refactor with inheritance between Article & Section

2023-05-25 10:30:18 +02:00 · 2023-05-25 10:30:18 +02:00 · 7e3680d282
commit 7e3680d282
parent f67cddd92c
2 changed files with 79 additions and 106 deletions
--- a/spip2md/config.py
+++ b/spip2md/config.py
@ -24,6 +24,7 @@ class Configuration:
    data_dir: str = "data"
    clear_output: bool = False
    prepend_h1: bool = True
+    export_filetype: str = "md"

    def __init__(self, config_file: Optional[str] = None):
        if config_file is not None:
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -1,5 +1,6 @@
 from os.path import basename, splitext
-from re import I, compile
+from re import I, compile, finditer
+from typing import Any

 from peewee import ModelSelect
 from slugify import slugify
@ -35,27 +36,20 @@ class Document(SpipDocuments):
        )


-EXPORTTYPE: str = "md"
-
-
-class Article(SpipArticles):
-    class Meta:
-        table_name: str = "spip_articles"
+class SpipObject:
+    id: int

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
+        # Common fields that need conversions
        self.titre: str = convert(self.titre, True)
        self.descriptif: str = convert(self.descriptif, True)
        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
        self.statut: str = "false" if self.statut == "publie" else "true"
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
-        # Article specific
-        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
-        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
-        self.chapo: str = convert(self.chapo)  # Probably unused
-        self.ps: str = convert(self.ps)  # Probably unused
-        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
+        # Define file prefix (need to be changed later)
+        self.prefix = "index"

    def documents(self) -> ModelSelect:
        documents = (
@ -64,7 +58,7 @@ class Article(SpipArticles):
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
-            .where(SpipDocumentsLiens.id_objet == self.id_article)
+            .where(SpipDocumentsLiens.id_objet == self.id)
        )
        for d in documents:
            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
@ -76,11 +70,13 @@ class Article(SpipArticles):
        return slugify((self.date + "-" if date else "") + self.titre)

    def filename(self) -> str:
-        return "index" + "." + self.lang + "." + EXPORTTYPE
+        return self.prefix + "." + self.lang + "." + config.export_filetype

    def frontmatter(self) -> str:
-        return dump(
-            {
+        raise NotImplementedError("Subclasses must implement 'frontmatter' method.")
+
+    def common_frontmatter(self) -> dict[str, Any]:
+        return {
            "lang": self.lang,
            "translationKey": self.id_trad,
            "title": self.titre,
@ -89,19 +85,9 @@ class Article(SpipArticles):
            "draft": self.statut,
            "description": self.descriptif,
            # Debugging
-                "spip_id": self.id_article,
            "spip_id_secteur": self.id_secteur,
-                # Article specific
-                "summary": self.chapo,
-                "surtitle": self.surtitre,
-                "subtitle": self.soustitre,
-                "date": self.date_redac,
-                "authors": [author.nom for author in self.authors()],
-                # Debugging
-                "spip_id_rubrique": self.id_rubrique,
-            },
-            allow_unicode=True,
-        )
+            "spip_id": self.id,
+        }

    def body(self) -> str:
        body: str = ""
@ -115,21 +101,57 @@ class Article(SpipArticles):
        # Same with an "extra" section
        if len(self.extra) > 0:
            body += "\n\n# EXTRA\n\n" + self.extra
-        # If there is a caption, add the caption followed by a hr
-        if hasattr(self, "caption") and len(self.caption) > 0:
-            body += "\n\n" + self.caption + "\n\n***"
-        # PS
-        if hasattr(self, "ps") and len(self.ps) > 0:
-            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
-        # Microblog
-        if hasattr(self, "microblog") and len(self.microblog) > 0:
-            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return body

    def content(self) -> str:
        # Return the final article text
        return "---\n" + self.frontmatter() + "---" + self.body()

+
+class Article(SpipObject, SpipArticles):
+    class Meta:
+        table_name: str = "spip_articles"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # More conversions needed for articles
+        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
+        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
+        self.chapo: str = convert(self.chapo)  # Probably unused
+        self.ps: str = convert(self.ps)  # Probably unused
+        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
+        # ID
+        self.id = self.id_article
+
+    def frontmatter(self) -> str:
+        return dump(
+            {
+                **super().common_frontmatter(),
+                # Article specific
+                "summary": self.chapo,
+                "surtitle": self.surtitre,
+                "subtitle": self.soustitre,
+                "date": self.date_redac,
+                "authors": [author.nom for author in self.authors()],
+                # Debugging
+                "spip_id_rubrique": self.id_rubrique,
+            },
+            allow_unicode=True,
+        )
+
+    def body(self) -> str:
+        body: str = super().body()
+        # If there is a caption, add the caption followed by a hr
+        if len(self.chapo) > 0:
+            body += "\n\n" + self.chapo + "\n\n***"
+        # PS
+        if len(self.ps) > 0:
+            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
+        # Microblog
+        if len(self.microblog) > 0:
+            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
+        return body
+
    def authors(self) -> list[SpipAuteurs]:
        return (
            SpipAuteurs.select()
@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
    )


-ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I)
-
-
 def link_articles(text: str):
-    for match in ARTICLE_LINK.finditer(text):
+    for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text):
        article = Article.get(Article.id_article == match.group(2))
        if len(match.group(1)) > 0:
            title: str = match.group(1)
@ -167,75 +186,28 @@ def link_articles(text: str):
    return text


-class Rubrique(SpipRubriques):
+class Rubrique(SpipObject, SpipRubriques):
    class Meta:
        table_name: str = "spip_rubriques"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        self.titre: str = convert(self.titre, True)
-        self.descriptif: str = convert(self.descriptif, True)
-        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
-        self.statut: str = "false" if self.statut == "publie" else "true"
-        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
-        self.extra: str = convert(self.extra)  # Probably unused
-
-    def documents(self) -> ModelSelect:
-        documents = (
-            Document.select()
-            .join(
-                SpipDocumentsLiens,
-                on=(Document.id_document == SpipDocumentsLiens.id_document),
-            )
-            .where(SpipDocumentsLiens.id_objet == self.id_rubrique)
-        )
-        for d in documents:
-            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
-        # Internal (articles) links
-        self.texte = link_articles(self.texte)
-        return documents
-
-    def slug(self, date: bool = False) -> str:
-        return slugify((self.date + "-" if date else "") + self.titre)
-
-    def filename(self) -> str:
-        return "_index" + "." + self.lang + "." + EXPORTTYPE
+        # ID
+        self.id = self.id_rubrique
+        # File prefix
+        self.prefix = "_index"

    def frontmatter(self) -> str:
        return dump(
            {
-                "lang": self.lang,
-                "translationKey": self.id_trad,
-                "title": self.titre,
-                "publishDate": self.date,
-                "lastmod": self.maj,
-                "draft": self.statut,
-                "description": self.descriptif,
+                **super().common_frontmatter(),
                # Debugging
-                "spip_id": self.id_rubrique,
-                "spip_id_secteur": self.id_secteur,
+                "spip_id_parent": self.id_parent,
+                "spip_profondeur": self.profondeur,
            },
            allow_unicode=True,
        )

-    def body(self) -> str:
-        body: str = ""
-        # Add the title as a Markdown h1
-        if len(self.titre) > 0 and config.prepend_h1:
-            body += "\n\n# " + self.titre
-        # If there is a text, add the text preceded by two line breaks
-        if len(self.texte) > 0:
-            # Remove remaining HTML after & append to body
-            body += "\n\n" + self.texte
-        # Same with an "extra" section
-        if len(self.extra) > 0:
-            body += "\n\n# EXTRA\n\n" + self.extra
-        return body
-
-    def content(self) -> str:
-        # Return the final article text
-        return "---\n" + self.frontmatter() + "---" + self.body()
-

 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect: