sections directories, better unknown char messages

2023-05-16 13:29:59 +02:00 · 2023-05-16 13:29:59 +02:00 · f23073ef12
commit f23073ef12
parent caf9db541f
3 changed files with 60 additions and 38 deletions
--- a/spip2md/articles.py
+++ b/spip2md/articles.py
@ -13,7 +13,7 @@ class Article:
        # self.surtitle = article.surtitre  # Probably unused
        self.title = convertMeta(article.titre)
        self.subtitle = article.soustitre  # Probably unused
-        # self.section = article.id_rubrique # TODO join
+        self.section_id = article.id_rubrique
        self.description = convertMeta(article.descriptif)
        self.caption = article.chapo  # Probably unused
        self.text = convertBody(article.texte)  # Markdown
@ -38,14 +38,30 @@ class Article:
        self.virtual = article.virtuel  # TODO Why ?
        self.microblog = article.microblog  # Probably unused
-    def getSlug(self):
+    def getSection(self):
-        return slugify(f"{self.id}-{self.title}")
+        return convertMeta(
            SpipRubriques.select()
            .where(SpipRubriques.id_rubrique == self.section_id)[0]
            .titre
        )
-    def getPath(self):
+    def getPath(self) -> str:
-        return self.getSlug()
+        return (
            slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
        )
    def getFilename(self) -> str:
        return "index.fr.md"
    def getAuthors(self):
-        return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
+        return (
            SpipAuteurs.select()
            .join(
                SpipAuteursLiens,
                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
            )
            .where(SpipAuteursLiens.id_objet == self.id)
        )
    def getFrontmatter(self):
        return dump(
@ -58,7 +74,7 @@ class Article:
                "lastmod": self.update,
                "draft": self.draft,
                "description": self.description,
-                "authors": [author.id_auteur for author in self.getAuthors()],
+                "authors": [author.nom for author in self.getAuthors()],
            },
            allow_unicode=True,
        )
@ -83,7 +99,7 @@ class Article:
            article += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return article
-    def getUnknownChars(self):
+    def getUnknownChars(self) -> list:
        errors: list = []
        for text in (self.title, self.text):
            for char in unknownIso:
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@ -1,7 +1,7 @@
 from re import I, S, compile, finditer
 # SPIP syntax to Markdown
-spipToMarkdown = (
+spipToMarkdown: tuple = (
    (  # horizontal rule
        compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
        # r"---",
@ -113,7 +113,7 @@ spipToMarkdown = (
    ),
 )
-spipToText = (
+spipToText: tuple = (
    (  # strong
        compile(r"\{\{ *(.*?) *\}\}", S | I),
        r"\1",
@ -158,7 +158,7 @@ spipToText = (
    ),
 )
-isoToUtf = (
+isoToUtf: tuple = (
    # Broken encoding
    (  # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
        "â€™",
@ -252,14 +252,13 @@ isoToUtf = (
 )
 ## WARNING unknown broken encoding
-unknownIso = (
+unknownIso: tuple = (
-    r"â€¨",  # unknown â€¨ + surroundings
+    r"â€¨",  # unknown â€¨
-    r"âˆ†",  # unknown â^† + surroundings
+    r"âˆ†",  # unknown â^†
 )
-def convertBody(spipBody):
+def convertBody(text: str) -> str:
    text: str = spipBody
    for spip, markdown in spipToMarkdown:
        text = spip.sub(markdown, text)
    for iso, utf in isoToUtf:
@ -267,18 +266,22 @@ def convertBody(spipBody):
    return text
-def convertMeta(spipMeta):
+def convertMeta(text: str) -> str:
    text: str = spipMeta
    for spip, metadata in spipToText:
        text = spip.sub(metadata, text)
    for iso, utf in isoToUtf:
        text.replace(iso, utf)
    return text
-def highlightUnknownChars(text):
+def removeUnknownChars(text: str) -> str:
    for char in unknownIso:
        text.replace(char, "")
    return text
 def highlightUnknownChars(text: str) -> str:
    # Define terminal escape sequences to stylize output, regex escaped
-    COLOR = "\033[91m" + "\033[1m"  # Red + Bold
+    COLOR: str = "\033[91m" + "\033[1m"  # Red + Bold
-    RESET = "\033[0m"
+    RESET: str = "\033[0m"
    # Highlight in COLOR unknown chars in text
    for char in unknownIso:
        for match in finditer(char, text):
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -1,14 +1,14 @@
 #!python
 from articles import Article, Articles
 from config import config
 from database import db
 from articles import Articles
 from converter import highlightUnknownChars
 from database import db
 if __name__ != "__main__":
    exit()
 import sys
-from os import mkdir
+from os import makedirs, mkdir
 from shutil import rmtree
 # Clean the output dir & create a new
@ -32,7 +32,8 @@ B: str = "\033[94m"
 BOLD: str = "\033[1m"
 RESET: str = "\033[0m"
-unknownChars: dict = {}
+# Articles that contains unknown chars
 unknownCharsArticles: list[Article] = []
 # Loop among first maxToExport articles & export them
 for counter, article in Articles(maxToExport):
@ -44,24 +45,26 @@ for counter, article in Articles(maxToExport):
    print(
        f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
    )
-    fullPath = config.outputDir + "/" + article.getPath()
+    fullPath: str = config.outputDir + "/" + article.getPath()
-    print(f"{BOLD}>{RESET} {fullPath}/index.md")
+    print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
-    mkdir(fullPath)
+    makedirs(fullPath, exist_ok=True)
-    with open(fullPath + "/index.md", "w") as f:
+    with open(fullPath + article.getFilename(), "w") as f:
        f.write(article.getArticle())
    # Store detected unknown characters
    if len(article.getUnknownChars()) > 0:
-        unknownChars[article.title] = article.getUnknownChars()
+        unknownCharsArticles.append(article)
-for title in unknownChars:
+for article in unknownCharsArticles:
-    nb = len(unknownChars[title])
+    unknownCharsApparitions: list = article.getUnknownChars()
    nb: int = len(unknownCharsApparitions)
    s: str = "s" if nb > 1 else ""
    print(
-        f"\n{BOLD}{nb} "
+        f"\n{BOLD}{nb}{RESET} unknown character{s} "
-        + f"unknown character{'s' if nb > 1 else ''} detected in{RESET} " +
+        + f"detected in article {BOLD}{article.id}{RESET}"
-            highlightUnknownChars(title)
+        + f"\n{BOLD}·{RESET} "
        + highlightUnknownChars(article.title)
    )
-    for text in unknownChars[title]:
+    for text in unknownCharsApparitions:
        print(f"  {BOLD}…{RESET} " + highlightUnknownChars(text))
-# Close the database connection
+db.close()  # Close the database connection
 db.close()