refactor with inheritance between Article & Section

2023-05-25 10:30:18 +02:00 · 2023-05-25 10:30:18 +02:00 · 7e3680d282
commit 7e3680d282
parent f67cddd92c
2 changed files with 79 additions and 106 deletions
--- a/spip2md/config.py
+++ b/spip2md/config.py
@ -24,6 +24,7 @@ class Configuration:
    data_dir: str = "data"
    clear_output: bool = False
    prepend_h1: bool = True
    export_filetype: str = "md"
    def __init__(self, config_file: Optional[str] = None):
        if config_file is not None:
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -1,5 +1,6 @@
 from os.path import basename, splitext
-from re import I, compile
+from re import I, compile, finditer
 from typing import Any
 from peewee import ModelSelect
 from slugify import slugify
@ -35,27 +36,20 @@ class Document(SpipDocuments):
        )
-EXPORTTYPE: str = "md"
+class SpipObject:
-
+    id: int
 class Article(SpipArticles):
    class Meta:
        table_name: str = "spip_articles"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Common fields that need conversions
        self.titre: str = convert(self.titre, True)
        self.descriptif: str = convert(self.descriptif, True)
        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
        self.statut: str = "false" if self.statut == "publie" else "true"
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
-        # Article specific
+        # Define file prefix (need to be changed later)
-        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
+        self.prefix = "index"
        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
        self.chapo: str = convert(self.chapo)  # Probably unused
        self.ps: str = convert(self.ps)  # Probably unused
        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
    def documents(self) -> ModelSelect:
        documents = (
@ -64,7 +58,7 @@ class Article(SpipArticles):
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
-            .where(SpipDocumentsLiens.id_objet == self.id_article)
+            .where(SpipDocumentsLiens.id_objet == self.id)
        )
        for d in documents:
            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
@ -76,11 +70,13 @@ class Article(SpipArticles):
        return slugify((self.date + "-" if date else "") + self.titre)
    def filename(self) -> str:
-        return "index" + "." + self.lang + "." + EXPORTTYPE
+        return self.prefix + "." + self.lang + "." + config.export_filetype
    def frontmatter(self) -> str:
-        return dump(
+        raise NotImplementedError("Subclasses must implement 'frontmatter' method.")
-            {
+
    def common_frontmatter(self) -> dict[str, Any]:
        return {
            "lang": self.lang,
            "translationKey": self.id_trad,
            "title": self.titre,
@ -89,19 +85,9 @@ class Article(SpipArticles):
            "draft": self.statut,
            "description": self.descriptif,
            # Debugging
                "spip_id": self.id_article,
            "spip_id_secteur": self.id_secteur,
-                # Article specific
+            "spip_id": self.id,
-                "summary": self.chapo,
+        }
                "surtitle": self.surtitre,
                "subtitle": self.soustitre,
                "date": self.date_redac,
                "authors": [author.nom for author in self.authors()],
                # Debugging
                "spip_id_rubrique": self.id_rubrique,
            },
            allow_unicode=True,
        )
    def body(self) -> str:
        body: str = ""
@ -115,21 +101,57 @@ class Article(SpipArticles):
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
        # If there is a caption, add the caption followed by a hr
        if hasattr(self, "caption") and len(self.caption) > 0:
            body += "\n\n" + self.caption + "\n\n***"
        # PS
        if hasattr(self, "ps") and len(self.ps) > 0:
            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
        # Microblog
        if hasattr(self, "microblog") and len(self.microblog) > 0:
            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return body
    def content(self) -> str:
        # Return the final article text
        return "---\n" + self.frontmatter() + "---" + self.body()
 class Article(SpipObject, SpipArticles):
    class Meta:
        table_name: str = "spip_articles"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # More conversions needed for articles
        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
        self.chapo: str = convert(self.chapo)  # Probably unused
        self.ps: str = convert(self.ps)  # Probably unused
        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
        # ID
        self.id = self.id_article
    def frontmatter(self) -> str:
        return dump(
            {
                **super().common_frontmatter(),
                # Article specific
                "summary": self.chapo,
                "surtitle": self.surtitre,
                "subtitle": self.soustitre,
                "date": self.date_redac,
                "authors": [author.nom for author in self.authors()],
                # Debugging
                "spip_id_rubrique": self.id_rubrique,
            },
            allow_unicode=True,
        )
    def body(self) -> str:
        body: str = super().body()
        # If there is a caption, add the caption followed by a hr
        if len(self.chapo) > 0:
            body += "\n\n" + self.chapo + "\n\n***"
        # PS
        if len(self.ps) > 0:
            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
        # Microblog
        if len(self.microblog) > 0:
            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return body
    def authors(self) -> list[SpipAuteurs]:
        return (
            SpipAuteurs.select()
@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
    )
 ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I)
 def link_articles(text: str):
-    for match in ARTICLE_LINK.finditer(text):
+    for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text):
        article = Article.get(Article.id_article == match.group(2))
        if len(match.group(1)) > 0:
            title: str = match.group(1)
@ -167,75 +186,28 @@ def link_articles(text: str):
    return text
-class Rubrique(SpipRubriques):
+class Rubrique(SpipObject, SpipRubriques):
    class Meta:
        table_name: str = "spip_rubriques"
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        self.titre: str = convert(self.titre, True)
+        # ID
-        self.descriptif: str = convert(self.descriptif, True)
+        self.id = self.id_rubrique
-        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
+        # File prefix
-        self.statut: str = "false" if self.statut == "publie" else "true"
+        self.prefix = "_index"
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
    def documents(self) -> ModelSelect:
        documents = (
            Document.select()
            .join(
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
            .where(SpipDocumentsLiens.id_objet == self.id_rubrique)
        )
        for d in documents:
            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
        # Internal (articles) links
        self.texte = link_articles(self.texte)
        return documents
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)
    def filename(self) -> str:
        return "_index" + "." + self.lang + "." + EXPORTTYPE
    def frontmatter(self) -> str:
        return dump(
            {
-                "lang": self.lang,
+                **super().common_frontmatter(),
                "translationKey": self.id_trad,
                "title": self.titre,
                "publishDate": self.date,
                "lastmod": self.maj,
                "draft": self.statut,
                "description": self.descriptif,
                # Debugging
-                "spip_id": self.id_rubrique,
+                "spip_id_parent": self.id_parent,
-                "spip_id_secteur": self.id_secteur,
+                "spip_profondeur": self.profondeur,
            },
            allow_unicode=True,
        )
    def body(self) -> str:
        body: str = ""
        # Add the title as a Markdown h1
        if len(self.titre) > 0 and config.prepend_h1:
            body += "\n\n# " + self.titre
        # If there is a text, add the text preceded by two line breaks
        if len(self.texte) > 0:
            # Remove remaining HTML after & append to body
            body += "\n\n" + self.texte
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
        return body
    def content(self) -> str:
        # Return the final article text
        return "---\n" + self.frontmatter() + "---" + self.body()
 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect: