From 13fa720562f0c08e1917498254a31575ebeaea38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= <pro@gfaure.eu>
Date: Wed, 24 May 2023 10:43:39 +0200
Subject: [PATCH] refactor to use Peewee objects extension in place of
 redefining every SPIP atribute

---
 spip2md/{converter.py => converters.py} | 125 ++++--------
 spip2md/database.py                     |   5 +-
 spip2md/items.py                        | 259 ------------------------
 spip2md/main.py                         |  80 ++++----
 spip2md/spipobjects.py                  | 212 +++++++++++++++++++
 5 files changed, 298 insertions(+), 383 deletions(-)
 rename spip2md/{converter.py => converters.py} (72%)
 delete mode 100644 spip2md/items.py
 create mode 100644 spip2md/spipobjects.py
diff --git a/spip2md/converter.py b/spip2md/converters.py
similarity index 72%
rename from spip2md/converter.py
rename to spip2md/converters.py
index 848a240..d17182a 100644
--- a/spip2md/converter.py
+++ b/spip2md/converters.py
@@ -3,7 +3,7 @@ from re import I, S, compile, finditer, sub
 from typing import Optional
 
 # SPIP syntax to Markdown
-spip_to_markdown = (
+SPIP_TO_MARKDOWN = (
     (  # horizontal rule
         compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
         # r"---",
@@ -40,6 +40,14 @@ spip_to_markdown = (
         ),
         r"~\1~",
     ),
+    (  # images
+        compile(r"<(img|image)([0-9]+)(\|.*?)*>", S | I),
+        r"![](\1\2)",
+    ),
+    (  # documents & embeds
+        compile(r"<(doc|document|emb)([0-9]+)(\|.*?)*>", S | I),
+        r"[](\1\2)",
+    ),
     (  # anchor
         compile(r"\[ *(.*?) *-> *(.*?) *\]", S | I),
         r"[\1](\2)",
@@ -100,58 +108,20 @@ spip_to_markdown = (
         ),
         r"\1",
     ),
-)
-
-spip_to_text = (
-    (  # strong
-        compile(r"\{\{ *(.*?) *\}\}", S | I),
-        r"\1",
-    ),
-    (  # html strong
-        compile(r"<strong> *(.*?) *</strong>", S | I),
-        r"\1",
-    ),
-    (  # emphasis
-        compile(r"\{ *(.*?) *\}", S | I),
-        r"\1",
-    ),
-    (  # html emphasis
-        compile(r"<i> *(.*?) *<\/i>", S | I),
-        r"\1",
-    ),
-    (  # strikethrough
-        compile(
-            r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
-            S | I,
-        ),
-        r"\1",
-    ),
-    (  # Keep only the first language in multi-language blocks
-        compile(
-            r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
-            S | I,
-        ),
-        r"\1",
-    ),
-    (  # remove every html tag
-        compile(r"<\/?.*?> *", S | I),
-        r"",
-    ),
-    (  # Remove beginning with angle bracket(s)
-        compile(r"^>+ +", S | I),
-        r"",
-    ),
-    (  # Remove beginning with a number followed by a dot
-        compile(r"^\d+\. +", S | I),
+    (  # WARNING remove every html tag
+        compile(r"<\/?.*?>\s*", S | I),
         r"",
     ),
 )
 
-# HTML tag WARNING can be used to remove them all
-html_tag = compile(r"<\/?.*?> *", S | I)
+# Further cleaning for metadata texts such as titles or descriptions
+SPIP_META_BLOAT = (
+    compile(r"^>+ +", S | I),  # Remove beginning with angle bracket(s)
+    compile(r"^\d+\. +", S | I),  # Remove beginning with a number followed by a dot
+)
 
 # Broken ISO encoding to proper UTF-8
-iso_to_utf = (
+ISO_TO_UTF = (
     (  # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
         "â€™",
         r"’",
@@ -264,82 +234,71 @@ iso_to_utf = (
 )
 
 # WARNING unknown broken encoding
-unknown_iso = (
+UNKNOWN_ISO = (
     r"â€¨",
     r"âˆ†",
     r"Ã»",
 )
 
 
-# Apply spip_to_markdown conversions to a text
-def convert_body(text: Optional[str]) -> str:
+# Apply SPIP to Markdown & ISO to UTF conversions to a text, & eventually clean meta
+def convert(text: Optional[str], clean_meta: bool = False) -> str:
     if text is None:
         return ""
-    for spip, markdown in spip_to_markdown:
+    for spip, markdown in SPIP_TO_MARKDOWN:
         text = spip.sub(markdown, text)
-    for iso, utf in iso_to_utf:
+    if clean_meta:
+        for bloat in SPIP_META_BLOAT:
+            text = bloat.sub("", text)
+    for iso, utf in ISO_TO_UTF:
         text = text.replace(iso, utf)
     return text
 
 
-# Apply spip_to_text conversions to a text
-def convert_meta(text: Optional[str]) -> str:
-    if text is None:
-        return ""
-    for spip, metadata in spip_to_text:
-        text = spip.sub(metadata, text)
-    for iso, utf in iso_to_utf:
-        text = text.replace(iso, utf)
-    return text
-
-
-# Replace images & documents in SPIP text with Markdown links with human-readable names
-def convert_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
+# Replace images & files links in Markdown with real slugs of the actually linked files
+def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
     for id, name, slug in documents:
+        # Replace images that dont have a title written in text
         text = sub(
-            r"<(?:img|image)" + str(id) + r"(\|.*?)*>",
+            r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
             f"![{name}]({slug})",
             text,
         )
+        # Replace images that dont have a title written in text
         text = sub(
-            r"<(?:doc|emb)" + str(id) + r"(\|.*?)*>",
+            r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
             f"[{name}]({slug})",
             text,
         )
+        # Replace images that already had a title in Markdown style link
         text = sub(
-            r"\[(.*?)\]\((?:doc|emb)" + str(id) + r"(\|.*?)*\)",
+            r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
+            f"![\\1]({slug})",
+            text,
+        )
+        # Replace documents that already had a title in Markdown style link
+        text = sub(
+            r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
             f"[\\1]({slug})",
             text,
         )
     return text
 
 
-# Replace unknown chars with empty strings (delete them)
-def remove_unknown_chars(text: str) -> str:
-    for char in unknown_iso:
-        text.replace(char, "")
-    return text
-
-
-# Replace HTML tags chars with empty strings (delete them)
-def remove_tags(text: str) -> str:
-    return html_tag.sub("", text)
-
-
 # Return a list of tuples giving the start and end of unknown substring in text
 def unknown_chars(text: str) -> list[tuple[int, int]]:
     positions: list[tuple[int, int]] = []
-    for char in unknown_iso:
+    for char in UNKNOWN_ISO:
         for match in finditer("(" + char + ")+", text):
             positions.append((match.start(), match.end()))
     return positions
 
 
 # Return strings with unknown chards found in text, surrounded by context_length chars
-def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
+def unknown_chars_context(text: str, context_length: int = 20) -> list[str]:
     errors: list[str] = []
     context: str = r".{0," + str(context_length) + r"}"
-    for char in unknown_iso:
+    for char in UNKNOWN_ISO:
         matches = finditer(
             context + r"(?=" + char + r")" + char + r".*?(?=\r?\n|$)",
             text,
diff --git a/spip2md/database.py b/spip2md/database.py
index ec9fd47..eb75acb 100644
--- a/spip2md/database.py
+++ b/spip2md/database.py
@@ -1,4 +1,3 @@
-# pyright: basic
 # type: ignore
 from peewee import (
     SQL,
@@ -15,7 +14,7 @@ from peewee import (
     TextField,
 )
 
-db = MySQLDatabase(None)
+DB = MySQLDatabase(None)
 
 
 # class UnknownField(object):
@@ -25,7 +24,7 @@ db = MySQLDatabase(None)
 
 class BaseModel(Model):
     class Meta:
-        database: MySQLDatabase = db
+        database: MySQLDatabase = DB
 
 
 class SpipArticles(BaseModel):
diff --git a/spip2md/items.py b/spip2md/items.py
deleted file mode 100644
index f443db1..0000000
--- a/spip2md/items.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# pyright: strict
-from os.path import basename, splitext
-from typing import Any, Optional
-
-from slugify import slugify
-from yaml import dump
-
-from converter import convert_body, convert_documents, convert_meta, remove_tags
-from database import (
-    SpipArticles,
-    SpipAuteurs,
-    SpipAuteursLiens,
-    SpipDocuments,
-    SpipDocumentsLiens,
-    SpipRubriques,
-)
-
-EXPORTTYPE: str = "md"
-
-
-class Iterator:
-    items: list[Any]
-
-    def __init__(self) -> None:
-        # Set the limit at the number of retrieved items
-        self.LIMIT: int = len(self.items)
-        # Start before the first element
-        self.count: int = -1
-
-    def __iter__(self):
-        return self
-
-    def __len__(self) -> int:
-        return self.LIMIT
-
-    def remaining(self) -> int:
-        return self.LIMIT - self.count
-
-    def __next__(self) -> Any:
-        self.count += 1
-        if self.remaining() <= 0:
-            raise StopIteration
-        return self.items[self.count]
-
-
-class Document:
-    def __init__(self, document: SpipDocuments) -> None:
-        self.id: int = document.id_document
-        self.thumbnail_id: int = document.id_vignette
-        self.title: str = convert_meta(document.titre)
-        self.date: str = document.date
-        self.description: str = convert_meta(document.descriptif)
-        self.file: str = document.fichier
-        self.draft: bool = document.statut == "publie"
-        self.creation: str = document.date
-        self.publication: str = document.date_publication
-        self.update: str = document.maj
-        self.media: str = document.media
-
-    def get_slug(self, date: bool = False) -> str:
-        name_type = splitext(basename(self.file))
-        return (
-            slugify((self.publication + "-" if date else "") + name_type[0])
-            + name_type[1]
-        )
-
-
-class Documents(Iterator):
-    def __init__(self, object_id: int) -> None:
-        # Query the DB to retrieve all documents related to object of id object_id
-        items = (
-            SpipDocuments.select()
-            .join(
-                SpipDocumentsLiens,
-                on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document),
-            )
-            .where(SpipDocumentsLiens.id_objet == object_id)
-        )
-        self.items: list[Document] = [Document(i) for i in items]
-        super().__init__()
-
-
-class Item:
-    id: int
-
-    def __init__(self, item: SpipArticles | SpipRubriques):
-        self.title: str = convert_meta(item.titre)
-        self.section_id: int = item.id_rubrique
-        self.description: str = convert_meta(item.descriptif)
-        self.text: str = convert_body(item.texte)  # Convert SPIP to Markdown
-        self.publication: str = item.date
-        self.draft: bool = item.statut == "publie"
-        self.sector_id: int = item.id_secteur
-        self.update: str = item.maj
-        self.lang: str = item.lang
-        self.set_lang: bool = item.langue_choisie == "oui"  # TODO Why ?
-        self.translation_key: int = item.id_trad
-        self.extra: str = convert_body(item.extra)  # Probably unused
-
-    def get_slug(self, date: bool = False) -> str:
-        return slugify((self.publication + "-" if date else "") + self.title)
-
-    def get_filename(self) -> str:
-        return "index" + "." + self.lang + "." + EXPORTTYPE
-
-    def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
-        return dump(
-            {
-                "lang": self.lang,
-                "translationKey": self.translation_key,
-                "title": self.title,
-                "publishDate": self.publication,
-                "lastmod": self.update,
-                "draft": self.draft,
-                "description": self.description,
-                # Debugging
-                "spip_id": self.id,
-                "spip_id_secteur": self.sector_id,
-            }
-            | append
-            if append is not None
-            else {},
-            allow_unicode=True,
-        )
-
-    def get_body(self) -> str:
-        body: str = ""
-        # Add the title as a Markdown h1
-        if len(self.title) > 0:
-            body += "\n\n# " + self.title
-        # If there is a text, add the text preceded by two line breaks
-        if len(self.text) > 0:
-            # Convert images & files links
-            text: str = convert_documents(
-                self.text,
-                [(d.id, d.title, d.get_slug()) for d in self.get_documents()],
-            )
-            # Remove remaining HTML after & append to body
-            body += "\n\n" + remove_tags(text)
-        # Same with an "extra" section
-        if len(self.extra) > 0:
-            body += "\n\n# EXTRA\n\n" + self.extra
-        return body
-
-    def get_content(self) -> str:
-        # Return the final article text
-        return "---\n" + self.get_frontmatter() + "---" + self.get_body()
-
-    def get_documents(self) -> Documents:
-        return Documents(self.id)
-
-
-class Article(Item):
-    def __init__(self, article: SpipArticles):
-        super().__init__(article)
-        self.id: int = article.id_article
-        self.surtitle: str = convert_meta(article.surtitre)  # Probably unused
-        self.subtitle: str = convert_meta(article.soustitre)  # Probably unused
-        self.caption: str = convert_body(article.chapo)  # Probably unused
-        self.ps: str = convert_body(article.ps)  # Probably unused
-        self.update_2: str = article.date_modif  # Probably unused duplicate of maj
-        self.creation: str = article.date_redac
-        self.forum: bool = article.accepter_forum == "oui"  # TODO Why ?
-        self.sitename: str = article.nom_site  # Probably useless
-        self.virtual: str = article.virtuel  # TODO Why ?
-        self.microblog: str = article.microblog  # Probably unused
-        # self.export = article.export  # USELESS
-        # self.views: int = article.visites  # USELESS in static
-        # self.referers: int = article.referers  # USELESS in static
-        # self.popularity: float = article.popularite  # USELESS in static
-        # self.version = article.id_version  # USELESS
-
-    def get_authors(self) -> list[SpipAuteurs]:
-        return (
-            SpipAuteurs.select()
-            .join(
-                SpipAuteursLiens,
-                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
-            )
-            .where(SpipAuteursLiens.id_objet == self.id)
-        )
-
-    def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
-        return super().get_frontmatter(
-            {
-                "surtitle": self.surtitle,
-                "subtitle": self.subtitle,
-                "date": self.creation,
-                "authors": [author.nom for author in self.get_authors()],
-                # Debugging
-                "spip_id_rubrique": self.section_id,
-                "spip_id_secteur": self.sector_id,
-                "spip_chapo": self.caption,
-            }
-            | append
-            if append is not None
-            else {},
-        )
-
-    def get_body(self) -> str:
-        body: str = super().get_body()
-        # If there is a caption, add the caption followed by a hr
-        if hasattr(self, "caption") and len(self.caption) > 0:
-            body += "\n\n" + self.caption + "\n\n***"
-        # PS
-        if hasattr(self, "ps") and len(self.ps) > 0:
-            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
-        # Microblog
-        if hasattr(self, "microblog") and len(self.microblog) > 0:
-            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
-        return body
-
-
-class Section(Item):
-    def __init__(self, section: SpipRubriques):
-        super().__init__(section)
-        self.id: int = section.id_rubrique
-        self.parent_id: int = section.id_parent
-        self.depth: int = section.profondeur
-        self.agenda: int = section.agenda
-
-    def get_filename(self) -> str:
-        return "_" + super().get_filename()
-
-    def get_articles(self, limit: int = 0):
-        return Articles(self.id, limit)
-
-
-class Articles(Iterator):
-    def __init__(self, section_id: int, limit: int = 0):
-        # Query the DB to retrieve all articles sorted by publication date
-        if limit > 0:
-            items = (
-                SpipArticles.select()
-                .where(SpipArticles.id_rubrique == section_id)
-                .order_by(SpipArticles.date.desc())
-                .limit(limit)
-            )
-        else:
-            items = (
-                SpipArticles.select()
-                .where(SpipArticles.id_rubrique == section_id)
-                .order_by(SpipArticles.date.desc())
-            )
-        self.items: list[Article] = [Article(i) for i in items]
-        super().__init__()
-
-
-class Sections(Iterator):
-    def __init__(self, limit: int = 0):
-        # Query the DB to retrieve all sections sorted by publication date
-        if limit > 0:
-            items = (
-                SpipRubriques.select().order_by(SpipRubriques.date.desc()).limit(limit)
-            )
-        else:
-            items = SpipRubriques.select().order_by(SpipRubriques.date.desc())
-        self.items: list[Section] = [Section(i) for i in items]
-        super().__init__()
diff --git a/spip2md/main.py b/spip2md/main.py
index 00b73b6..9bd0336 100755
--- a/spip2md/main.py
+++ b/spip2md/main.py
@@ -1,18 +1,19 @@
 #!python
-# pyright: strict
 from os import makedirs
 from os.path import expanduser
 from shutil import copyfile, rmtree
 from sys import argv
 
 from config import config
-from converter import get_unknown_chars, unknown_chars
-from database import db
-from items import (
+from converters import unknown_chars, unknown_chars_context
+from database import DB
+from spipobjects import (
     Article,
     Document,
-    Section,
-    Sections,
+    Rubrique,
+    get_articles,
+    get_documents,
+    get_sections,
 )
 
 
@@ -64,26 +65,27 @@ def indent(nb: int = 1) -> None:
 
 
 # Connect to the MySQL database with Peewee ORM
-db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
-db.connect()
+DB.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
+DB.connect()
 
 
 # Output information about ongoing export & write section to output destination
-def write_section(index: int, total: int, section: Section) -> str:
+def write_section(index: int, total: int, section: Rubrique) -> str:
+    color = G  # Associate sections to green
     # Print the name of the exported section & number of remaining sections
     style(f"{index + 1}. ", BO)
-    highlight(section.title, *unknown_chars(section.title))
-    style(f" {total-index-1}", BO, G)
+    highlight(section.titre, *unknown_chars(section.titre))
+    style(f" {total-index-1}", BO, color)
     style(f" section{s(total-index)} left")
     # Define the section’s path (directory) & create directory(ies) if needed
-    sectiondir: str = config.output_dir + "/" + section.get_slug()
+    sectiondir: str = config.output_dir + "/" + section.slug()
     makedirs(sectiondir, exist_ok=True)
     # Define the section filename & write the index at that filename
-    sectionpath: str = sectiondir + "/" + section.get_filename()
+    sectionpath: str = sectiondir + "/" + section.filename()
     with open(sectionpath, "w") as f:
-        f.write(section.get_content())
+        f.write(section.content())
     # Print export location when finished exporting
-    style(" -> ", BO, G)
+    style(" -> ", BO, color)
     print(sectionpath)
     # Return the first "limit" articles of section
     return sectiondir
@@ -91,30 +93,31 @@ def write_section(index: int, total: int, section: Section) -> str:
 
 # Output information about ongoing export & write article to output destination
 def write_article(index: int, total: int, article: Article, sectiondir: str) -> str:
+    color = Y  # Associate articles to yellow
     # Print the remaining number of articles to export every 100 articles
     if index % 100 == 0:
         indent()
         print("Exporting", end="")
-        style(f" {total-index}", BO, Y)
+        style(f" {total-index}", BO, color)
         print(" SPIP", end="")
         style(f" article{s(total-index)}")
         print(" to Markdown & YAML files")
     # Print the title of the article being exported
     style(
         f"  {index + 1}. "
-        + ("EMPTY " if len(article.text) < 1 else "")
+        + ("EMPTY " if len(article.texte) < 1 else "")
         + f"{article.lang} "
     )
-    highlight(article.title, *unknown_chars(article.title))
+    highlight(article.titre, *unknown_chars(article.titre))
     # Define the full article path & create directory(ies) if needed
-    articledir: str = sectiondir + "/" + article.get_slug()
+    articledir: str = sectiondir + "/" + article.slug()
     makedirs(articledir, exist_ok=True)
     # Define the article filename & write the article at the filename
-    articlepath: str = articledir + "/" + article.get_filename()
+    articlepath: str = articledir + "/" + article.filename()
     with open(articlepath, "w") as f:
-        f.write(article.get_content())
+        f.write(article.content())
     # Print export location when finished exporting
-    style(" -> ", BO, B)
+    style(" -> ", BO, color)
     print(articlepath)
     return articledir
 
@@ -123,34 +126,35 @@ def write_article(index: int, total: int, article: Article, sectiondir: str) ->
 def write_document(
     index: int, total: int, document: Document, objectdir: str, indent_depth: int = 1
 ) -> None:
+    color = B  # Associate documents to blue
     if index % 100 == 0:
         indent(indent_depth)
         print("Exporting", end="")
-        style(f" {total-index}", BO, B)
+        style(f" {total-index}", BO, color)
         style(f" document{s(total-index)}\n")
     # Print the name of the file with a counter
     indent(indent_depth)
     style(f"{index + 1}. {document.media} ")
-    if len(document.title) > 0:
-        highlight(document.title + " ", *unknown_chars(document.title))
+    if len(document.titre) > 0:
+        highlight(document.titre + " ", *unknown_chars(document.titre))
     style("at ")
-    print(document.file, end="")
+    print(document.fichier, end="")
     # Define document path
-    documentpath: str = expanduser(config.data_dir + "/" + document.file)
+    documentpath: str = expanduser(config.data_dir + "/" + document.fichier)
     # Copy the document from it’s SPIP location to the new location
     try:
-        copyfile(documentpath, objectdir + "/" + document.get_slug())
+        copyfile(documentpath, objectdir + "/" + document.slug())
     except FileNotFoundError:
         style(" -> NOT FOUND!\n", BO, R)
     else:
         # Print the outputted file’s path when copied the file
-        style(" ->", BO, B)
-        print(f" {objectdir}/{document.get_slug()}")
+        style(" ->", BO, color)
+        print(f" {objectdir}/{document.slug()}")
 
 
 # Return true if an article field contains an unknown character
 def has_unknown_chars(article: Article) -> bool:
-    if len(get_unknown_chars(article.text)) > 0:
+    if len(unknown_chars_context(article.texte)) > 0:
         return True
     return False
 
@@ -159,13 +163,13 @@ def has_unknown_chars(article: Article) -> bool:
 def warn_unknown_chars(article: Article) -> None:
     # Print the title of the article in which there is unknown characters
     # & the number of them
-    unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
+    unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
     nb: int = len(unknown_chars_apparitions)
     s: str = "s" if nb > 1 else ""
     style(f"{nb}")
     print(f" unknown character{s} in", end="")
     style(f" {article.lang} ")
-    highlight(article.title, *unknown_chars(article.title))
+    highlight(article.titre, *unknown_chars(article.titre))
     print()  # Break line
     # Print the context in which the unknown characters are found
     for text in unknown_chars_apparitions:
@@ -197,7 +201,7 @@ if __name__ == "__main__":
     unknown_chars_articles: list[Article] = []
 
     # Get sections with an eventual maximum
-    sections = Sections(max_sections_export)
+    sections = get_sections(max_sections_export)
     nb_sections_export: int = len(sections)
 
     # Loop among sections & export them
@@ -205,11 +209,11 @@ if __name__ == "__main__":
         # Write the section & store its articles
         sectiondir = write_section(i, nb_sections_export, section)
         # Loop over section’s related files (images …)
-        documents = section.get_documents()
+        documents = get_documents(section.id_rubrique)
         for i, document in enumerate(documents):
             write_document(i, len(documents), document, sectiondir)
         # Loop over section’s articles
-        articles = section.get_articles(max_articles_export)
+        articles = get_articles(section.id_rubrique, (max_articles_export))
         for i, article in enumerate(articles):
             articledir = write_article(i, len(articles), article, sectiondir)
             # Add article to unknown_chars_articles if needed
@@ -218,7 +222,7 @@ if __name__ == "__main__":
             # Decrement export limit
             max_articles_export -= 1
             # Loop over article’s related files (images …)
-            documents = section.get_documents()
+            documents = get_documents(article.id_article)
             for i, document in enumerate(documents):
                 write_document(i, len(documents), document, sectiondir, 2)
         # Break line when finished exporting the section
@@ -229,4 +233,4 @@ if __name__ == "__main__":
     for article in unknown_chars_articles:
         warn_unknown_chars(article)
 
-    db.close()  # Close the connection with the database
+    DB.close()  # Close the connection with the database
diff --git a/spip2md/spipobjects.py b/spip2md/spipobjects.py
new file mode 100644
index 0000000..d6a8805
--- /dev/null
+++ b/spip2md/spipobjects.py
@@ -0,0 +1,212 @@
+from os.path import basename, splitext
+
+from peewee import ModelSelect
+from slugify import slugify
+from yaml import dump
+
+from converters import convert
+from database import (
+    SpipArticles,
+    SpipAuteurs,
+    SpipAuteursLiens,
+    SpipDocuments,
+    SpipDocumentsLiens,
+    SpipRubriques,
+)
+
+EXPORTTYPE: str = "md"
+
+# Convert images & files links
+# text: str = convert_documents(
+#     self.texte,
+#     [(d.id, d.titre, d.slug()) for d in self.documents()],
+# )
+
+
+class Document(SpipDocuments):
+    class Meta:
+        table_name: str = "spip_documents"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.titre: str = convert(self.titre, True)
+        self.descriptif: str = convert(self.descriptif, True)
+        self.statut: str = "false" if self.statut == "publie" else "true"
+
+    def slug(self, date: bool = False) -> str:
+        name_type: tuple[str, str] = splitext(basename(self.fichier))
+        return (
+            slugify((self.date_publication + "-" if date else "") + name_type[0])
+            + name_type[1]
+        )
+
+
+class Article(SpipArticles):
+    class Meta:
+        table_name: str = "spip_articles"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.titre: str = convert(self.titre, True)
+        self.descriptif: str = convert(self.descriptif, True)
+        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
+        self.statut: str = "false" if self.statut == "publie" else "true"
+        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
+        self.extra: str = convert(self.extra)  # Probably unused
+        # Article specific
+        self.surtitle: str = convert(self.surtitre, True)  # Probably unused
+        self.subtitle: str = convert(self.soustitre, True)  # Probably unused
+        self.caption: str = convert(self.chapo)  # Probably unused
+        self.ps: str = convert(self.ps)  # Probably unused
+        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
+
+    def slug(self, date: bool = False) -> str:
+        return slugify((self.date + "-" if date else "") + self.titre)
+
+    def filename(self) -> str:
+        return "index" + "." + self.lang + "." + EXPORTTYPE
+
+    def frontmatter(self) -> str:
+        return dump(
+            {
+                "lang": self.lang,
+                "translationKey": self.id_trad,
+                "title": self.titre,
+                "publishDate": self.date,
+                "lastmod": self.maj,
+                "draft": self.statut,
+                "description": self.descriptif,
+                # Debugging
+                "spip_id": self.id_article,
+                "spip_id_secteur": self.id_secteur,
+                # Article specific
+                "surtitle": self.surtitle,
+                "subtitle": self.subtitle,
+                "date": self.date_redac,
+                "authors": [author.nom for author in self.authors()],
+                # Debugging
+                "spip_id_rubrique": self.id_rubrique,
+                "spip_chapo": self.caption,
+            },
+            allow_unicode=True,
+        )
+
+    def body(self) -> str:
+        body: str = ""
+        # Add the title as a Markdown h1
+        if len(self.titre) > 0:
+            body += "\n\n# " + self.titre
+        # If there is a text, add the text preceded by two line breaks
+        if len(self.texte) > 0:
+            # Remove remaining HTML after & append to body
+            body += "\n\n"
+        # Same with an "extra" section
+        if len(self.extra) > 0:
+            body += "\n\n# EXTRA\n\n" + self.extra
+        # If there is a caption, add the caption followed by a hr
+        if hasattr(self, "caption") and len(self.caption) > 0:
+            body += "\n\n" + self.caption + "\n\n***"
+        # PS
+        if hasattr(self, "ps") and len(self.ps) > 0:
+            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
+        # Microblog
+        if hasattr(self, "microblog") and len(self.microblog) > 0:
+            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
+        return body
+
+    def content(self) -> str:
+        # Return the final article text
+        return "---\n" + self.frontmatter() + "---" + self.body()
+
+    def authors(self) -> list[SpipAuteurs]:
+        return (
+            SpipAuteurs.select()
+            .join(
+                SpipAuteursLiens,
+                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
+            )
+            .where(SpipAuteursLiens.id_objet == self.id_article)
+        )
+
+
+class Rubrique(SpipRubriques):
+    class Meta:
+        table_name: str = "spip_rubriques"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.titre: str = convert(self.titre, True)
+        self.descriptif: str = convert(self.descriptif, True)
+        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
+        self.statut: str = "false" if self.statut == "publie" else "true"
+        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
+        self.extra: str = convert(self.extra)  # Probably unused
+
+    def slug(self, date: bool = False) -> str:
+        return slugify((self.date + "-" if date else "") + self.titre)
+
+    def filename(self) -> str:
+        return "index" + "." + self.lang + "." + EXPORTTYPE
+
+    def frontmatter(self) -> str:
+        return dump(
+            {
+                "lang": self.lang,
+                "translationKey": self.id_trad,
+                "title": self.titre,
+                "publishDate": self.date,
+                "lastmod": self.maj,
+                "draft": self.statut,
+                "description": self.descriptif,
+                # Debugging
+                "spip_id": self.id_rubrique,
+                "spip_id_secteur": self.id_secteur,
+            },
+            allow_unicode=True,
+        )
+
+    def body(self) -> str:
+        body: str = ""
+        # Add the title as a Markdown h1
+        if len(self.titre) > 0:
+            body += "\n\n# " + self.titre
+        # If there is a text, add the text preceded by two line breaks
+        if len(self.texte) > 0:
+            # Remove remaining HTML after & append to body
+            body += "\n\n"
+        # Same with an "extra" section
+        if len(self.extra) > 0:
+            body += "\n\n# EXTRA\n\n" + self.extra
+        return body
+
+    def content(self) -> str:
+        # Return the final article text
+        return "---\n" + self.frontmatter() + "---" + self.body()
+
+
+# Query the DB to retrieve all sections sorted by publication date
+def get_sections(limit: int = 10**6) -> ModelSelect:
+    return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
+
+
+# Query the DB to retrieve all articles sorted by publication date
+def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
+    return (
+        Article.select()
+        .where(Article.id_rubrique == section_id)
+        .order_by(Article.date.desc())
+        .limit(limit)
+    )
+
+
+# Query the DB to retrieve all documents related to object of id object_id
+def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
+    return (
+        Document.select()
+        .join(
+            SpipDocumentsLiens,
+            on=(Document.id_document == SpipDocumentsLiens.id_document),
+        )
+        .where(SpipDocumentsLiens.id_objet == object_id)
+        .limit(limit)
+    )