better article text build

2023-05-16 10:01:33 +02:00 · 2023-05-16 10:01:33 +02:00 · 12db0375e7
commit 12db0375e7
parent bc616cc7a1
2 changed files with 40 additions and 45 deletions
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@ -252,7 +252,10 @@ isoToUtf = (
 )
 ## WARNING unknown broken encoding
-unknownIso = (compile(r"\w*â€¨.*\r?\n"),)  # unknown â€¨ + surroundings
+unknownIso = (
    compile(r"\w*â€¨.*\r?\n"),  # unknown â€¨ + surroundings
    compile(r"\w*âˆ†.*\r?\n"),  # unknown â^† + surroundings
 )
 def convertBody(spipBody):
--- a/spip2md/iterator.py
+++ b/spip2md/iterator.py
@ -1,3 +1,5 @@
 from array import array
 from converter import convertBody, convertMeta
 from database import *
 from slugify import slugify
@ -38,7 +40,7 @@ class Article:
    def get_slug(self):
        return slugify(f"{self.id}-{self.title}")
-    
+
    def get_path(self):
        return self.get_slug()
@ -46,55 +48,45 @@ class Article:
        return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
    def get_frontmatter(self):
-        return "---\n{}---".format(
+        return dump(
-            dump(
+            {
-                {
+                "lang": self.lang,
-                    "lang": self.lang,
+                "title": self.title,
-                    "title": self.title,
+                # "subtitle": self.subtitle,
-                    # "subtitle": self.subtitle,
+                "date": self.creationDate,
-                    "date": self.creationDate,
+                "publishDate": self.publicationDate,
-                    "publishDate": self.publicationDate,
+                "lastmod": self.update,
-                    "lastmod": self.update,
+                "draft": self.draft,
-                    "draft": self.draft,
+                "description": self.description,
-                    "description": self.description,
+                "authors": [author.id_auteur for author in self.get_authors()],
-                    "authors": [author.id_auteur for author in self.get_authors()],
+            },
-                },
+            allow_unicode=True,
                allow_unicode=True,
            )
        )
    # Contains things before the article like caption & titles
    def get_starting(self):
        return (
            # f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n"
            f"{self.caption}\n\n***\n"
            if len(self.caption) > 0 and self.caption != " "
            else ""
        )
    # Contains things after the article like ps & extra
    def get_ending(self):
        return (
            f"# EXTRA\n\n{self.extra}"
            if self.extra != None and len(self.extra) > 0
            else "" + f"# POST-SCRIPTUM\n\n{self.ps}"
            if len(self.ps) > 0
            else "" + f"# MICROBLOGGING\n\n{self.microblog}"
            if len(self.microblog) > 0
            else ""
        )
    def get_article(self):
-        return "{}\n{}\n{}\n{}".format(
+        # Build the final article text
-            self.get_frontmatter(),
+        article: str = "---\n" + self.get_frontmatter() + "---"
-            self.get_starting(),
+        # If there is a caption, add the caption followed by a hr
-            self.text,
+        if len(self.caption) > 0:
-            self.get_ending(),
+            article += "\n\n" + self.caption + "\n\n***"
-        )
+        # If there is a text, add the text preceded by two line breaks
        if len(self.text) > 0:
            article += "\n\n" + self.text
        # Same with an "extra" section
        if self.extra != None and len(self.extra) > 0:
            article += "\n\n# EXTRA\n\n" + self.extra
        # PS
        if len(self.ps) > 0:
            article += "\n\n# POST-SCRIPTUM\n\n" + self.ps
        # Microblog
        if len(self.microblog) > 0:
            article += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return article
 class Articles:
    exported: int = 0
    unknownChars: list = []
    def __init__(self, maxToExport) -> None:
        # Query the DB to retrieve all articles sorted by publication date
@ -111,7 +103,7 @@ class Articles:
    def __next__(self):
        if self.remaining() <= 0:
-            raise StopIteration
+            raise StopIteration()
        self.exported += 1
        return (
            {"exported": self.exported, "remaining": self.remaining()},