From 12db0375e7f065cacee85f0c9001074fb952f2f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= <pro@gfaure.eu>
Date: Tue, 16 May 2023 10:01:33 +0200
Subject: [PATCH] better article text build

---
 spip2md/converter.py |  5 ++-
 spip2md/iterator.py  | 80 ++++++++++++++++++++------------------------
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/spip2md/converter.py b/spip2md/converter.py
index 30e1513..9da8723 100644
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@@ -252,7 +252,10 @@ isoToUtf = (
 )
 
 ## WARNING unknown broken encoding
-unknownIso = (compile(r"\w*â€¨.*\r?\n"),)  # unknown â€¨ + surroundings
+unknownIso = (
+    compile(r"\w*â€¨.*\r?\n"),  # unknown â€¨ + surroundings
+    compile(r"\w*âˆ†.*\r?\n"),  # unknown â^† + surroundings
+)
 
 
 def convertBody(spipBody):
diff --git a/spip2md/iterator.py b/spip2md/iterator.py
index a359a6d..3ac9990 100644
--- a/spip2md/iterator.py
+++ b/spip2md/iterator.py
@@ -1,3 +1,5 @@
+from array import array
+
 from converter import convertBody, convertMeta
 from database import *
 from slugify import slugify
@@ -38,7 +40,7 @@ class Article:
 
     def get_slug(self):
         return slugify(f"{self.id}-{self.title}")
-    
+
     def get_path(self):
         return self.get_slug()
 
@@ -46,55 +48,45 @@ class Article:
         return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
 
     def get_frontmatter(self):
-        return "---\n{}---".format(
-            dump(
-                {
-                    "lang": self.lang,
-                    "title": self.title,
-                    # "subtitle": self.subtitle,
-                    "date": self.creationDate,
-                    "publishDate": self.publicationDate,
-                    "lastmod": self.update,
-                    "draft": self.draft,
-                    "description": self.description,
-                    "authors": [author.id_auteur for author in self.get_authors()],
-                },
-                allow_unicode=True,
-            )
-        )
-
-    # Contains things before the article like caption & titles
-    def get_starting(self):
-        return (
-            # f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n"
-            f"{self.caption}\n\n***\n"
-            if len(self.caption) > 0 and self.caption != " "
-            else ""
-        )
-
-    # Contains things after the article like ps & extra
-    def get_ending(self):
-        return (
-            f"# EXTRA\n\n{self.extra}"
-            if self.extra != None and len(self.extra) > 0
-            else "" + f"# POST-SCRIPTUM\n\n{self.ps}"
-            if len(self.ps) > 0
-            else "" + f"# MICROBLOGGING\n\n{self.microblog}"
-            if len(self.microblog) > 0
-            else ""
+        return dump(
+            {
+                "lang": self.lang,
+                "title": self.title,
+                # "subtitle": self.subtitle,
+                "date": self.creationDate,
+                "publishDate": self.publicationDate,
+                "lastmod": self.update,
+                "draft": self.draft,
+                "description": self.description,
+                "authors": [author.id_auteur for author in self.get_authors()],
+            },
+            allow_unicode=True,
         )
 
     def get_article(self):
-        return "{}\n{}\n{}\n{}".format(
-            self.get_frontmatter(),
-            self.get_starting(),
-            self.text,
-            self.get_ending(),
-        )
+        # Build the final article text
+        article: str = "---\n" + self.get_frontmatter() + "---"
+        # If there is a caption, add the caption followed by a hr
+        if len(self.caption) > 0:
+            article += "\n\n" + self.caption + "\n\n***"
+        # If there is a text, add the text preceded by two line breaks
+        if len(self.text) > 0:
+            article += "\n\n" + self.text
+        # Same with an "extra" section
+        if self.extra != None and len(self.extra) > 0:
+            article += "\n\n# EXTRA\n\n" + self.extra
+        # PS
+        if len(self.ps) > 0:
+            article += "\n\n# POST-SCRIPTUM\n\n" + self.ps
+        # Microblog
+        if len(self.microblog) > 0:
+            article += "\n\n# MICROBLOGGING\n\n" + self.microblog
+        return article
 
 
 class Articles:
     exported: int = 0
+    unknownChars: list = []
 
     def __init__(self, maxToExport) -> None:
         # Query the DB to retrieve all articles sorted by publication date
@@ -111,7 +103,7 @@ class Articles:
 
     def __next__(self):
         if self.remaining() <= 0:
-            raise StopIteration
+            raise StopIteration()
         self.exported += 1
         return (
             {"exported": self.exported, "remaining": self.remaining()},