diff --git a/spip2md/articles.py b/spip2md/articles.py
index 672378d..e22fc22 100644
--- a/spip2md/articles.py
+++ b/spip2md/articles.py
@@ -13,7 +13,7 @@ class Article:
# self.surtitle = article.surtitre # Probably unused
self.title = convertMeta(article.titre)
self.subtitle = article.soustitre # Probably unused
- # self.section = article.id_rubrique # TODO join
+ self.section_id = article.id_rubrique
self.description = convertMeta(article.descriptif)
self.caption = article.chapo # Probably unused
self.text = convertBody(article.texte) # Markdown
@@ -38,14 +38,30 @@ class Article:
self.virtual = article.virtuel # TODO Why ?
self.microblog = article.microblog # Probably unused
- def getSlug(self):
- return slugify(f"{self.id}-{self.title}")
+ def getSection(self):
+ return convertMeta(
+ SpipRubriques.select()
+ .where(SpipRubriques.id_rubrique == self.section_id)[0]
+ .titre
+ )
- def getPath(self):
- return self.getSlug()
+ def getPath(self) -> str:
+ return (
+ slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
+ )
+
+ def getFilename(self) -> str:
+ return "index.fr.md"
def getAuthors(self):
- return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
+ return (
+ SpipAuteurs.select()
+ .join(
+ SpipAuteursLiens,
+ on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
+ )
+ .where(SpipAuteursLiens.id_objet == self.id)
+ )
def getFrontmatter(self):
return dump(
@@ -58,7 +74,7 @@ class Article:
"lastmod": self.update,
"draft": self.draft,
"description": self.description,
- "authors": [author.id_auteur for author in self.getAuthors()],
+ "authors": [author.nom for author in self.getAuthors()],
},
allow_unicode=True,
)
@@ -83,7 +99,7 @@ class Article:
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
return article
- def getUnknownChars(self):
+ def getUnknownChars(self) -> list:
errors: list = []
for text in (self.title, self.text):
for char in unknownIso:
diff --git a/spip2md/converter.py b/spip2md/converter.py
index cec63b8..1784c52 100644
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@@ -1,7 +1,7 @@
from re import I, S, compile, finditer
# SPIP syntax to Markdown
-spipToMarkdown = (
+spipToMarkdown: tuple = (
( # horizontal rule
compile(r"- ?- ?- ?- ?[\- ]*|
", S | I),
# r"---",
@@ -113,7 +113,7 @@ spipToMarkdown = (
),
)
-spipToText = (
+spipToText: tuple = (
( # strong
compile(r"\{\{ *(.*?) *\}\}", S | I),
r"\1",
@@ -158,7 +158,7 @@ spipToText = (
),
)
-isoToUtf = (
+isoToUtf: tuple = (
# Broken encoding
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
"’",
@@ -252,14 +252,13 @@ isoToUtf = (
)
## WARNING unknown broken encoding
-unknownIso = (
- r"
", # unknown 
 + surroundings
- r"∆", # unknown â^† + surroundings
+unknownIso: tuple = (
+ r"
", # unknown 

+ r"∆", # unknown â^†
)
-def convertBody(spipBody):
- text: str = spipBody
+def convertBody(text: str) -> str:
for spip, markdown in spipToMarkdown:
text = spip.sub(markdown, text)
for iso, utf in isoToUtf:
@@ -267,18 +266,22 @@ def convertBody(spipBody):
return text
-def convertMeta(spipMeta):
- text: str = spipMeta
+def convertMeta(text: str) -> str:
for spip, metadata in spipToText:
text = spip.sub(metadata, text)
for iso, utf in isoToUtf:
text.replace(iso, utf)
return text
-def highlightUnknownChars(text):
+def removeUnknownChars(text: str) -> str:
+ for char in unknownIso:
+ text.replace(char, "")
+ return text
+
+def highlightUnknownChars(text: str) -> str:
# Define terminal escape sequences to stylize output, regex escaped
- COLOR = "\033[91m" + "\033[1m" # Red + Bold
- RESET = "\033[0m"
+ COLOR: str = "\033[91m" + "\033[1m" # Red + Bold
+ RESET: str = "\033[0m"
# Highlight in COLOR unknown chars in text
for char in unknownIso:
for match in finditer(char, text):
diff --git a/spip2md/main.py b/spip2md/main.py
index cc108af..cdf09de 100755
--- a/spip2md/main.py
+++ b/spip2md/main.py
@@ -1,14 +1,14 @@
#!python
+from articles import Article, Articles
from config import config
-from database import db
-from articles import Articles
from converter import highlightUnknownChars
+from database import db
if __name__ != "__main__":
exit()
import sys
-from os import mkdir
+from os import makedirs, mkdir
from shutil import rmtree
# Clean the output dir & create a new
@@ -32,7 +32,8 @@ B: str = "\033[94m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
-unknownChars: dict = {}
+# Articles that contains unknown chars
+unknownCharsArticles: list[Article] = []
# Loop among first maxToExport articles & export them
for counter, article in Articles(maxToExport):
@@ -44,24 +45,26 @@ for counter, article in Articles(maxToExport):
print(
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
)
- fullPath = config.outputDir + "/" + article.getPath()
- print(f"{BOLD}>{RESET} {fullPath}/index.md")
- mkdir(fullPath)
- with open(fullPath + "/index.md", "w") as f:
+ fullPath: str = config.outputDir + "/" + article.getPath()
+ print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
+ makedirs(fullPath, exist_ok=True)
+ with open(fullPath + article.getFilename(), "w") as f:
f.write(article.getArticle())
# Store detected unknown characters
if len(article.getUnknownChars()) > 0:
- unknownChars[article.title] = article.getUnknownChars()
+ unknownCharsArticles.append(article)
-for title in unknownChars:
- nb = len(unknownChars[title])
+for article in unknownCharsArticles:
+ unknownCharsApparitions: list = article.getUnknownChars()
+ nb: int = len(unknownCharsApparitions)
+ s: str = "s" if nb > 1 else ""
print(
- f"\n{BOLD}{nb} "
- + f"unknown character{'s' if nb > 1 else ''} detected in{RESET} " +
- highlightUnknownChars(title)
+ f"\n{BOLD}{nb}{RESET} unknown character{s} "
+ + f"detected in article {BOLD}{article.id}{RESET}"
+ + f"\n{BOLD}·{RESET} "
+ + highlightUnknownChars(article.title)
)
- for text in unknownChars[title]:
+ for text in unknownCharsApparitions:
print(f" {BOLD}…{RESET} " + highlightUnknownChars(text))
-# Close the database connection
-db.close()
+db.close() # Close the database connection