diff --git a/spip2md/articles.py b/spip2md/articles.py index 672378d..e22fc22 100644 --- a/spip2md/articles.py +++ b/spip2md/articles.py @@ -13,7 +13,7 @@ class Article: # self.surtitle = article.surtitre # Probably unused self.title = convertMeta(article.titre) self.subtitle = article.soustitre # Probably unused - # self.section = article.id_rubrique # TODO join + self.section_id = article.id_rubrique self.description = convertMeta(article.descriptif) self.caption = article.chapo # Probably unused self.text = convertBody(article.texte) # Markdown @@ -38,14 +38,30 @@ class Article: self.virtual = article.virtuel # TODO Why ? self.microblog = article.microblog # Probably unused - def getSlug(self): - return slugify(f"{self.id}-{self.title}") + def getSection(self): + return convertMeta( + SpipRubriques.select() + .where(SpipRubriques.id_rubrique == self.section_id)[0] + .titre + ) - def getPath(self): - return self.getSlug() + def getPath(self) -> str: + return ( + slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/" + ) + + def getFilename(self) -> str: + return "index.fr.md" def getAuthors(self): - return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id) + return ( + SpipAuteurs.select() + .join( + SpipAuteursLiens, + on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur), + ) + .where(SpipAuteursLiens.id_objet == self.id) + ) def getFrontmatter(self): return dump( @@ -58,7 +74,7 @@ class Article: "lastmod": self.update, "draft": self.draft, "description": self.description, - "authors": [author.id_auteur for author in self.getAuthors()], + "authors": [author.nom for author in self.getAuthors()], }, allow_unicode=True, ) @@ -83,7 +99,7 @@ class Article: article += "\n\n# MICROBLOGGING\n\n" + self.microblog return article - def getUnknownChars(self): + def getUnknownChars(self) -> list: errors: list = [] for text in (self.title, self.text): for char in unknownIso: diff --git a/spip2md/converter.py b/spip2md/converter.py index cec63b8..1784c52 100644 --- a/spip2md/converter.py +++ b/spip2md/converter.py @@ -1,7 +1,7 @@ from re import I, S, compile, finditer # SPIP syntax to Markdown -spipToMarkdown = ( +spipToMarkdown: tuple = ( ( # horizontal rule compile(r"- ?- ?- ?- ?[\- ]*|
", S | I), # r"---", @@ -113,7 +113,7 @@ spipToMarkdown = ( ), ) -spipToText = ( +spipToText: tuple = ( ( # strong compile(r"\{\{ *(.*?) *\}\}", S | I), r"\1", @@ -158,7 +158,7 @@ spipToText = ( ), ) -isoToUtf = ( +isoToUtf: tuple = ( # Broken encoding ( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 "’", @@ -252,14 +252,13 @@ isoToUtf = ( ) ## WARNING unknown broken encoding -unknownIso = ( - r"
", # unknown 
 + surroundings - r"∆", # unknown â^† + surroundings +unknownIso: tuple = ( + r"
", # unknown 
 + r"∆", # unknown â^† ) -def convertBody(spipBody): - text: str = spipBody +def convertBody(text: str) -> str: for spip, markdown in spipToMarkdown: text = spip.sub(markdown, text) for iso, utf in isoToUtf: @@ -267,18 +266,22 @@ def convertBody(spipBody): return text -def convertMeta(spipMeta): - text: str = spipMeta +def convertMeta(text: str) -> str: for spip, metadata in spipToText: text = spip.sub(metadata, text) for iso, utf in isoToUtf: text.replace(iso, utf) return text -def highlightUnknownChars(text): +def removeUnknownChars(text: str) -> str: + for char in unknownIso: + text.replace(char, "") + return text + +def highlightUnknownChars(text: str) -> str: # Define terminal escape sequences to stylize output, regex escaped - COLOR = "\033[91m" + "\033[1m" # Red + Bold - RESET = "\033[0m" + COLOR: str = "\033[91m" + "\033[1m" # Red + Bold + RESET: str = "\033[0m" # Highlight in COLOR unknown chars in text for char in unknownIso: for match in finditer(char, text): diff --git a/spip2md/main.py b/spip2md/main.py index cc108af..cdf09de 100755 --- a/spip2md/main.py +++ b/spip2md/main.py @@ -1,14 +1,14 @@ #!python +from articles import Article, Articles from config import config -from database import db -from articles import Articles from converter import highlightUnknownChars +from database import db if __name__ != "__main__": exit() import sys -from os import mkdir +from os import makedirs, mkdir from shutil import rmtree # Clean the output dir & create a new @@ -32,7 +32,8 @@ B: str = "\033[94m" BOLD: str = "\033[1m" RESET: str = "\033[0m" -unknownChars: dict = {} +# Articles that contains unknown chars +unknownCharsArticles: list[Article] = [] # Loop among first maxToExport articles & export them for counter, article in Articles(maxToExport): @@ -44,24 +45,26 @@ for counter, article in Articles(maxToExport): print( f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title) ) - fullPath = config.outputDir + "/" + article.getPath() - print(f"{BOLD}>{RESET} {fullPath}/index.md") - mkdir(fullPath) - with open(fullPath + "/index.md", "w") as f: + fullPath: str = config.outputDir + "/" + article.getPath() + print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}") + makedirs(fullPath, exist_ok=True) + with open(fullPath + article.getFilename(), "w") as f: f.write(article.getArticle()) # Store detected unknown characters if len(article.getUnknownChars()) > 0: - unknownChars[article.title] = article.getUnknownChars() + unknownCharsArticles.append(article) -for title in unknownChars: - nb = len(unknownChars[title]) +for article in unknownCharsArticles: + unknownCharsApparitions: list = article.getUnknownChars() + nb: int = len(unknownCharsApparitions) + s: str = "s" if nb > 1 else "" print( - f"\n{BOLD}{nb} " - + f"unknown character{'s' if nb > 1 else ''} detected in{RESET} " + - highlightUnknownChars(title) + f"\n{BOLD}{nb}{RESET} unknown character{s} " + + f"detected in article {BOLD}{article.id}{RESET}" + + f"\n{BOLD}·{RESET} " + + highlightUnknownChars(article.title) ) - for text in unknownChars[title]: + for text in unknownCharsApparitions: print(f" {BOLD}…{RESET} " + highlightUnknownChars(text)) -# Close the database connection -db.close() +db.close() # Close the database connection