sections directories, better unknown char messages

2023-05-16 13:29:59 +02:00 · 2023-05-16 13:29:59 +02:00 · f23073ef12
commit f23073ef12
parent caf9db541f
3 changed files with 60 additions and 38 deletions
--- a/spip2md/articles.py
+++ b/spip2md/articles.py
@ -13,7 +13,7 @@ class Article:
        # self.surtitle = article.surtitre  # Probably unused
        self.title = convertMeta(article.titre)
        self.subtitle = article.soustitre  # Probably unused
-        # self.section = article.id_rubrique # TODO join
+        self.section_id = article.id_rubrique
        self.description = convertMeta(article.descriptif)
        self.caption = article.chapo  # Probably unused
        self.text = convertBody(article.texte)  # Markdown
@ -38,14 +38,30 @@ class Article:
        self.virtual = article.virtuel  # TODO Why ?
        self.microblog = article.microblog  # Probably unused

-    def getSlug(self):
-        return slugify(f"{self.id}-{self.title}")
+    def getSection(self):
+        return convertMeta(
+            SpipRubriques.select()
+            .where(SpipRubriques.id_rubrique == self.section_id)[0]
+            .titre
+        )

-    def getPath(self):
-        return self.getSlug()
+    def getPath(self) -> str:
+        return (
+            slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
+        )
+
+    def getFilename(self) -> str:
+        return "index.fr.md"

    def getAuthors(self):
-        return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
+        return (
+            SpipAuteurs.select()
+            .join(
+                SpipAuteursLiens,
+                on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
+            )
+            .where(SpipAuteursLiens.id_objet == self.id)
+        )

    def getFrontmatter(self):
        return dump(
@ -58,7 +74,7 @@ class Article:
                "lastmod": self.update,
                "draft": self.draft,
                "description": self.description,
-                "authors": [author.id_auteur for author in self.getAuthors()],
+                "authors": [author.nom for author in self.getAuthors()],
            },
            allow_unicode=True,
        )
@ -83,7 +99,7 @@ class Article:
            article += "\n\n# MICROBLOGGING\n\n" + self.microblog
        return article

-    def getUnknownChars(self):
+    def getUnknownChars(self) -> list:
        errors: list = []
        for text in (self.title, self.text):
            for char in unknownIso:
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@ -1,7 +1,7 @@
 from re import I, S, compile, finditer

 # SPIP syntax to Markdown
-spipToMarkdown = (
+spipToMarkdown: tuple = (
    (  # horizontal rule
        compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
        # r"---",
@ -113,7 +113,7 @@ spipToMarkdown = (
    ),
 )

-spipToText = (
+spipToText: tuple = (
    (  # strong
        compile(r"\{\{ *(.*?) *\}\}", S | I),
        r"\1",
@ -158,7 +158,7 @@ spipToText = (
    ),
 )

-isoToUtf = (
+isoToUtf: tuple = (
    # Broken encoding
    (  # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
        "â€™",
@ -252,14 +252,13 @@ isoToUtf = (
 )

 ## WARNING unknown broken encoding
-unknownIso = (
-    r"â€¨",  # unknown â€¨ + surroundings
-    r"âˆ†",  # unknown â^† + surroundings
+unknownIso: tuple = (
+    r"â€¨",  # unknown â€¨
+    r"âˆ†",  # unknown â^†
 )


-def convertBody(spipBody):
-    text: str = spipBody
+def convertBody(text: str) -> str:
    for spip, markdown in spipToMarkdown:
        text = spip.sub(markdown, text)
    for iso, utf in isoToUtf:
@ -267,18 +266,22 @@ def convertBody(spipBody):
    return text


-def convertMeta(spipMeta):
-    text: str = spipMeta
+def convertMeta(text: str) -> str:
    for spip, metadata in spipToText:
        text = spip.sub(metadata, text)
    for iso, utf in isoToUtf:
        text.replace(iso, utf)
    return text

-def highlightUnknownChars(text):
+def removeUnknownChars(text: str) -> str:
+    for char in unknownIso:
+        text.replace(char, "")
+    return text
+
+def highlightUnknownChars(text: str) -> str:
    # Define terminal escape sequences to stylize output, regex escaped
-    COLOR = "\033[91m" + "\033[1m"  # Red + Bold
-    RESET = "\033[0m"
+    COLOR: str = "\033[91m" + "\033[1m"  # Red + Bold
+    RESET: str = "\033[0m"
    # Highlight in COLOR unknown chars in text
    for char in unknownIso:
        for match in finditer(char, text):
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -1,14 +1,14 @@
 #!python
+from articles import Article, Articles
 from config import config
-from database import db
-from articles import Articles
 from converter import highlightUnknownChars
+from database import db

 if __name__ != "__main__":
    exit()

 import sys
-from os import mkdir
+from os import makedirs, mkdir
 from shutil import rmtree

 # Clean the output dir & create a new
@ -32,7 +32,8 @@ B: str = "\033[94m"
 BOLD: str = "\033[1m"
 RESET: str = "\033[0m"

-unknownChars: dict = {}
+# Articles that contains unknown chars
+unknownCharsArticles: list[Article] = []

 # Loop among first maxToExport articles & export them
 for counter, article in Articles(maxToExport):
@ -44,24 +45,26 @@ for counter, article in Articles(maxToExport):
    print(
        f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
    )
-    fullPath = config.outputDir + "/" + article.getPath()
-    print(f"{BOLD}>{RESET} {fullPath}/index.md")
-    mkdir(fullPath)
-    with open(fullPath + "/index.md", "w") as f:
+    fullPath: str = config.outputDir + "/" + article.getPath()
+    print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
+    makedirs(fullPath, exist_ok=True)
+    with open(fullPath + article.getFilename(), "w") as f:
        f.write(article.getArticle())
    # Store detected unknown characters
    if len(article.getUnknownChars()) > 0:
-        unknownChars[article.title] = article.getUnknownChars()
+        unknownCharsArticles.append(article)

-for title in unknownChars:
-    nb = len(unknownChars[title])
+for article in unknownCharsArticles:
+    unknownCharsApparitions: list = article.getUnknownChars()
+    nb: int = len(unknownCharsApparitions)
+    s: str = "s" if nb > 1 else ""
    print(
-        f"\n{BOLD}{nb} "
-        + f"unknown character{'s' if nb > 1 else ''} detected in{RESET} " +
-            highlightUnknownChars(title)
+        f"\n{BOLD}{nb}{RESET} unknown character{s} "
+        + f"detected in article {BOLD}{article.id}{RESET}"
+        + f"\n{BOLD}·{RESET} "
+        + highlightUnknownChars(article.title)
    )
-    for text in unknownChars[title]:
+    for text in unknownCharsApparitions:
        print(f"  {BOLD}…{RESET} " + highlightUnknownChars(text))

-# Close the database connection
-db.close()
+db.close()  # Close the database connection