From 7e3680d282999fb6a141203f94c07d74e50b48fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= <pro@gfaure.eu>
Date: Thu, 25 May 2023 10:30:18 +0200
Subject: [PATCH] refactor with inheritance between Article & Section

---
 spip2md/config.py      |   1 +
 spip2md/spipobjects.py | 184 +++++++++++++++++------------------------
 2 files changed, 79 insertions(+), 106 deletions(-)

diff --git a/spip2md/config.py b/spip2md/config.py
index d84af5d..c5ad8f9 100644
--- a/spip2md/config.py
+++ b/spip2md/config.py
@@ -24,6 +24,7 @@ class Configuration:
     data_dir: str = "data"
     clear_output: bool = False
     prepend_h1: bool = True
+    export_filetype: str = "md"
 
     def __init__(self, config_file: Optional[str] = None):
         if config_file is not None:
diff --git a/spip2md/spipobjects.py b/spip2md/spipobjects.py
index b73e528..eb2093e 100644
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@@ -1,5 +1,6 @@
 from os.path import basename, splitext
-from re import I, compile
+from re import I, compile, finditer
+from typing import Any
 
 from peewee import ModelSelect
 from slugify import slugify
@@ -35,27 +36,20 @@ class Document(SpipDocuments):
         )
 
 
-EXPORTTYPE: str = "md"
-
-
-class Article(SpipArticles):
-    class Meta:
-        table_name: str = "spip_articles"
+class SpipObject:
+    id: int
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        # Common fields that need conversions
         self.titre: str = convert(self.titre, True)
         self.descriptif: str = convert(self.descriptif, True)
         self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
         self.statut: str = "false" if self.statut == "publie" else "true"
         self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
         self.extra: str = convert(self.extra)  # Probably unused
-        # Article specific
-        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
-        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
-        self.chapo: str = convert(self.chapo)  # Probably unused
-        self.ps: str = convert(self.ps)  # Probably unused
-        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
+        # Define file prefix (need to be changed later)
+        self.prefix = "index"
 
     def documents(self) -> ModelSelect:
         documents = (
@@ -64,7 +58,7 @@ class Article(SpipArticles):
                 SpipDocumentsLiens,
                 on=(Document.id_document == SpipDocumentsLiens.id_document),
             )
-            .where(SpipDocumentsLiens.id_objet == self.id_article)
+            .where(SpipDocumentsLiens.id_objet == self.id)
         )
         for d in documents:
             self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
@@ -76,32 +70,24 @@ class Article(SpipArticles):
         return slugify((self.date + "-" if date else "") + self.titre)
 
     def filename(self) -> str:
-        return "index" + "." + self.lang + "." + EXPORTTYPE
+        return self.prefix + "." + self.lang + "." + config.export_filetype
 
     def frontmatter(self) -> str:
-        return dump(
-            {
-                "lang": self.lang,
-                "translationKey": self.id_trad,
-                "title": self.titre,
-                "publishDate": self.date,
-                "lastmod": self.maj,
-                "draft": self.statut,
-                "description": self.descriptif,
-                # Debugging
-                "spip_id": self.id_article,
-                "spip_id_secteur": self.id_secteur,
-                # Article specific
-                "summary": self.chapo,
-                "surtitle": self.surtitre,
-                "subtitle": self.soustitre,
-                "date": self.date_redac,
-                "authors": [author.nom for author in self.authors()],
-                # Debugging
-                "spip_id_rubrique": self.id_rubrique,
-            },
-            allow_unicode=True,
-        )
+        raise NotImplementedError("Subclasses must implement 'frontmatter' method.")
+
+    def common_frontmatter(self) -> dict[str, Any]:
+        return {
+            "lang": self.lang,
+            "translationKey": self.id_trad,
+            "title": self.titre,
+            "publishDate": self.date,
+            "lastmod": self.maj,
+            "draft": self.statut,
+            "description": self.descriptif,
+            # Debugging
+            "spip_id_secteur": self.id_secteur,
+            "spip_id": self.id,
+        }
 
     def body(self) -> str:
         body: str = ""
@@ -115,21 +101,57 @@ class Article(SpipArticles):
         # Same with an "extra" section
         if len(self.extra) > 0:
             body += "\n\n# EXTRA\n\n" + self.extra
-        # If there is a caption, add the caption followed by a hr
-        if hasattr(self, "caption") and len(self.caption) > 0:
-            body += "\n\n" + self.caption + "\n\n***"
-        # PS
-        if hasattr(self, "ps") and len(self.ps) > 0:
-            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
-        # Microblog
-        if hasattr(self, "microblog") and len(self.microblog) > 0:
-            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
         return body
 
     def content(self) -> str:
         # Return the final article text
         return "---\n" + self.frontmatter() + "---" + self.body()
 
+
+class Article(SpipObject, SpipArticles):
+    class Meta:
+        table_name: str = "spip_articles"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # More conversions needed for articles
+        self.surtitre: str = convert(self.surtitre, True)  # Probably unused
+        self.soustitre: str = convert(self.soustitre, True)  # Probably unused
+        self.chapo: str = convert(self.chapo)  # Probably unused
+        self.ps: str = convert(self.ps)  # Probably unused
+        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
+        # ID
+        self.id = self.id_article
+
+    def frontmatter(self) -> str:
+        return dump(
+            {
+                **super().common_frontmatter(),
+                # Article specific
+                "summary": self.chapo,
+                "surtitle": self.surtitre,
+                "subtitle": self.soustitre,
+                "date": self.date_redac,
+                "authors": [author.nom for author in self.authors()],
+                # Debugging
+                "spip_id_rubrique": self.id_rubrique,
+            },
+            allow_unicode=True,
+        )
+
+    def body(self) -> str:
+        body: str = super().body()
+        # If there is a caption, add the caption followed by a hr
+        if len(self.chapo) > 0:
+            body += "\n\n" + self.chapo + "\n\n***"
+        # PS
+        if len(self.ps) > 0:
+            body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
+        # Microblog
+        if len(self.microblog) > 0:
+            body += "\n\n# MICROBLOGGING\n\n" + self.microblog
+        return body
+
     def authors(self) -> list[SpipAuteurs]:
         return (
             SpipAuteurs.select()
@@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
     )
 
 
-ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I)
-
-
 def link_articles(text: str):
-    for match in ARTICLE_LINK.finditer(text):
+    for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text):
         article = Article.get(Article.id_article == match.group(2))
         if len(match.group(1)) > 0:
             title: str = match.group(1)
@@ -167,75 +186,28 @@ def link_articles(text: str):
     return text
 
 
-class Rubrique(SpipRubriques):
+class Rubrique(SpipObject, SpipRubriques):
     class Meta:
         table_name: str = "spip_rubriques"
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.titre: str = convert(self.titre, True)
-        self.descriptif: str = convert(self.descriptif, True)
-        self.texte: str = convert(self.texte)  # Convert SPIP to Markdown
-        self.statut: str = "false" if self.statut == "publie" else "true"
-        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
-        self.extra: str = convert(self.extra)  # Probably unused
-
-    def documents(self) -> ModelSelect:
-        documents = (
-            Document.select()
-            .join(
-                SpipDocumentsLiens,
-                on=(Document.id_document == SpipDocumentsLiens.id_document),
-            )
-            .where(SpipDocumentsLiens.id_objet == self.id_rubrique)
-        )
-        for d in documents:
-            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
-        # Internal (articles) links
-        self.texte = link_articles(self.texte)
-        return documents
-
-    def slug(self, date: bool = False) -> str:
-        return slugify((self.date + "-" if date else "") + self.titre)
-
-    def filename(self) -> str:
-        return "_index" + "." + self.lang + "." + EXPORTTYPE
+        # ID
+        self.id = self.id_rubrique
+        # File prefix
+        self.prefix = "_index"
 
     def frontmatter(self) -> str:
         return dump(
             {
-                "lang": self.lang,
-                "translationKey": self.id_trad,
-                "title": self.titre,
-                "publishDate": self.date,
-                "lastmod": self.maj,
-                "draft": self.statut,
-                "description": self.descriptif,
+                **super().common_frontmatter(),
                 # Debugging
-                "spip_id": self.id_rubrique,
-                "spip_id_secteur": self.id_secteur,
+                "spip_id_parent": self.id_parent,
+                "spip_profondeur": self.profondeur,
             },
             allow_unicode=True,
         )
 
-    def body(self) -> str:
-        body: str = ""
-        # Add the title as a Markdown h1
-        if len(self.titre) > 0 and config.prepend_h1:
-            body += "\n\n# " + self.titre
-        # If there is a text, add the text preceded by two line breaks
-        if len(self.texte) > 0:
-            # Remove remaining HTML after & append to body
-            body += "\n\n" + self.texte
-        # Same with an "extra" section
-        if len(self.extra) > 0:
-            body += "\n\n# EXTRA\n\n" + self.extra
-        return body
-
-    def content(self) -> str:
-        # Return the final article text
-        return "---\n" + self.frontmatter() + "---" + self.body()
-
 
 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect: