From e1c8bd4b2eae2e9d26b4a37d6059bce569becea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Tue, 16 May 2023 15:55:51 +0200 Subject: [PATCH] PEP8 refactor --- spip2md/articles.py | 44 +++++++++++++++++++++++--------------------- spip2md/config.py | 36 ++++++++++++++++++------------------ spip2md/converter.py | 39 +++++++++++++++++++++------------------ spip2md/database.py | 17 ++++++++++++++--- spip2md/main.py | 44 ++++++++++++++++++++++---------------------- 5 files changed, 98 insertions(+), 82 deletions(-) diff --git a/spip2md/articles.py b/spip2md/articles.py index 8de9038..6867c14 100644 --- a/spip2md/articles.py +++ b/spip2md/articles.py @@ -1,23 +1,25 @@ # pyright: basic from re import finditer -from converter import convertBody, convertMeta, unknownIso -from database import * from slugify import slugify -# from yaml import CDumper as Dumper from yaml import dump +from converter import convert_body, convert_meta, unknown_iso +from database import SpipArticles, SpipAuteurs, SpipAuteursLiens, SpipRubriques + +# from yaml import CDumper as Dumper + class Article: def __init__(self, article): self.id: int = article.id_article # self.surtitle = article.surtitre # Probably unused - self.title: str = convertMeta(article.titre) + self.title: str = convert_meta(article.titre) self.subtitle: str = article.soustitre # Probably unused self.section_id: int = article.id_rubrique - self.description: str = convertMeta(article.descriptif) + self.description: str = convert_meta(article.descriptif) self.caption: str = article.chapo # Probably unused - self.text: str = convertBody(article.texte) # Markdown + self.text: str = convert_body(article.texte) # Markdown self.ps: str = article.ps # Probably unused self.publicationDate: str = article.date self.draft: bool = False if article.statut == "publie" else True @@ -39,22 +41,22 @@ class Article: self.virtual: str = article.virtuel # TODO Why ? self.microblog: str = article.microblog # Probably unused - def getSection(self) -> str: - return convertMeta( + def get_section(self) -> str: + return convert_meta( SpipRubriques.select() .where(SpipRubriques.id_rubrique == self.section_id)[0] .titre ) - def getPath(self) -> str: + def get_path(self) -> str: return ( - slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/" + slugify(self.get_section()) + "/" + slugify(f"{self.id}-{self.title}") + "/" ) - def getFilename(self) -> str: + def get_filename(self) -> str: return "index.fr.md" - def getAuthors(self) -> tuple: + def get_authors(self) -> tuple: return ( SpipAuteurs.select() .join( @@ -64,7 +66,7 @@ class Article: .where(SpipAuteursLiens.id_objet == self.id) ) - def getFrontmatter(self) -> str: + def get_frontmatter(self) -> str: return dump( { "lang": self.lang, @@ -75,14 +77,14 @@ class Article: "lastmod": self.update, "draft": self.draft, "description": self.description, - "authors": [author.nom for author in self.getAuthors()], + "authors": [author.nom for author in self.get_authors()], }, allow_unicode=True, ) - def getArticle(self) -> str: + def get_article(self) -> str: # Build the final article text - article: str = "---\n" + self.getFrontmatter() + "---" + article: str = "---\n" + self.get_frontmatter() + "---" # If there is a caption, add the caption followed by a hr if len(self.caption) > 0: article += "\n\n" + self.caption + "\n\n***" @@ -90,7 +92,7 @@ class Article: if len(self.text) > 0: article += "\n\n" + self.text # Same with an "extra" section - if self.extra != None and len(self.extra) > 0: + if self.extra is not None and len(self.extra) > 0: article += "\n\n# EXTRA\n\n" + self.extra # PS if len(self.ps) > 0: @@ -100,10 +102,10 @@ class Article: article += "\n\n# MICROBLOGGING\n\n" + self.microblog return article - def getUnknownChars(self) -> list[str]: + def get_unknown_chars(self) -> list[str]: errors: list[str] = [] for text in (self.title, self.text): - for char in unknownIso: + for char in unknown_iso: for match in finditer(char + r".*(?=\r?\n|$)", text): errors.append(match.group()) return errors @@ -112,10 +114,10 @@ class Article: class Articles: exported: int = 0 - def __init__(self, maxToExport: int) -> None: + def __init__(self, maxexport: int) -> None: # Query the DB to retrieve all articles sorted by publication date self.articles = ( - SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxToExport) + SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxexport) ) self.toExport: int = len(self.articles) diff --git a/spip2md/config.py b/spip2md/config.py index f60e2ef..ae9b602 100644 --- a/spip2md/config.py +++ b/spip2md/config.py @@ -4,36 +4,36 @@ from os.path import isfile from yaml import CLoader as Loader from yaml import load -configPaths = ("spip2md.yml", "spip2md.yaml") +config_paths = ("spip2md.yml", "spip2md.yaml") class Configuration: db = "spip" - dbHost = "localhost" - dbUser = "spip" - dbPass = "password" - outputDir = "output" - defaultNbToExport = 1000 + db_host = "localhost" + db_user = "spip" + db_pass = "password" + output_dir = "output" + default_export_nb = 1000 - def __init__(self, configFile: str | None = None) -> None: - if configFile != None: - with open(configFile) as f: + def __init__(self, config_file: str | None = None) -> None: + if config_file is not None: + with open(config_file) as f: config = load(f.read(), Loader=Loader) if "db" in config: self.db = config["db"] - if "dbUser" in config: - self.dbUser = config["dbUser"] - if "dbPass" in config: - self.dbPass = config["dbPass"] - if "outputDir" in config: - self.outputDir = config["outputDir"] - if "defaultNbToExport" in config: - self.defaultNbToExport = config["defaultNbToExport"] + if "db_user" in config: + self.db_user = config["db_user"] + if "db_pass" in config: + self.db_pass = config["db_pass"] + if "output_dir" in config: + self.output_dir = config["output_dir"] + if "default_export_nb" in config: + self.default_export_nb = config["default_export_nb"] config = Configuration() -for path in configPaths: +for path in config_paths: if isfile(path): config = Configuration(path) break diff --git a/spip2md/converter.py b/spip2md/converter.py index 2e44cdf..f17c726 100644 --- a/spip2md/converter.py +++ b/spip2md/converter.py @@ -2,7 +2,7 @@ from re import I, S, compile, finditer # SPIP syntax to Markdown -spipToMarkdown = ( +spip_to_markdown = ( ( # horizontal rule compile(r"- ?- ?- ?- ?[\- ]*|
", S | I), # r"---", @@ -114,7 +114,7 @@ spipToMarkdown = ( ), ) -spipToText = ( +spip_to_text = ( ( # strong compile(r"\{\{ *(.*?) *\}\}", S | I), r"\1", @@ -159,7 +159,7 @@ spipToText = ( ), ) -isoToUtf = ( +iso_to_utf = ( # Broken encoding ( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 "’", @@ -253,44 +253,47 @@ isoToUtf = ( ) ## WARNING unknown broken encoding -unknownIso = ( +unknown_iso = ( r"
", # unknown 
 r"∆", # unknown â^† ) +# Define terminal escape sequences to stylize output, regex escaped +RED: str = "\033[91m" +BOLD: str = "\033[1m" +RESET: str = "\033[0m" -def convertBody(text: str) -> str: - for spip, markdown in spipToMarkdown: + +def convert_body(text: str) -> str: + for spip, markdown in spip_to_markdown: text = spip.sub(markdown, text) - for iso, utf in isoToUtf: + for iso, utf in iso_to_utf: text.replace(iso, utf) return text -def convertMeta(text: str) -> str: - for spip, metadata in spipToText: +def convert_meta(text: str) -> str: + for spip, metadata in spip_to_text: text = spip.sub(metadata, text) - for iso, utf in isoToUtf: + for iso, utf in iso_to_utf: text.replace(iso, utf) return text -def removeUnknownChars(text: str) -> str: - for char in unknownIso: +def remove_unknown_chars(text: str) -> str: + for char in unknown_iso: text.replace(char, "") return text -def highlightUnknownChars(text: str) -> str: - # Define terminal escape sequences to stylize output, regex escaped - COLOR: str = "\033[91m" + "\033[1m" # Red + Bold - RESET: str = "\033[0m" +def highlight_unknown_chars(text: str) -> str: # Highlight in COLOR unknown chars in text - for char in unknownIso: + for char in unknown_iso: for match in finditer(char, text): text = ( text[: match.start()] - + COLOR + + RED + + BOLD + match.group() + RESET + text[match.end() :] diff --git a/spip2md/database.py b/spip2md/database.py index 6360692..3e2431a 100644 --- a/spip2md/database.py +++ b/spip2md/database.py @@ -1,7 +1,18 @@ # pyright: basic -from peewee import (SQL, BigAutoField, BigIntegerField, CharField, - CompositeKey, DateField, DateTimeField, FloatField, - IntegerField, Model, MySQLDatabase, TextField) +from peewee import ( + SQL, + BigAutoField, + BigIntegerField, + CharField, + CompositeKey, + DateField, + DateTimeField, + FloatField, + IntegerField, + Model, + MySQLDatabase, + TextField, +) # class UnknownField(object): # def __init__(self, *_, **__): diff --git a/spip2md/main.py b/spip2md/main.py index 4a054e8..b96d1b6 100755 --- a/spip2md/main.py +++ b/spip2md/main.py @@ -2,7 +2,7 @@ # pyright: basic from articles import Article, Articles from config import config -from converter import highlightUnknownChars +from converter import highlight_unknown_chars from database import db if __name__ != "__main__": @@ -13,18 +13,18 @@ from os import makedirs, mkdir from shutil import rmtree # Clean the output dir & create a new -rmtree(config.outputDir, True) -mkdir(config.outputDir) +rmtree(config.output_dir, True) +mkdir(config.output_dir) # Connect to the MySQL database with Peewee ORM -db.init(config.db, host=config.dbHost, user=config.dbUser, password=config.dbPass) +db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) db.connect() # Define max nb of articles to export based on first CLI param if len(sys.argv) > 1: - maxToExport = int(sys.argv[1]) + maxexport = int(sys.argv[1]) else: - maxToExport = config.defaultNbToExport + maxexport = config.default_export_nb # Define terminal escape sequences to stylize output R: str = "\033[91m" @@ -34,38 +34,38 @@ BOLD: str = "\033[1m" RESET: str = "\033[0m" # Articles that contains unknown chars -unknownCharsArticles: list[Article] = [] +unknown_chars_articles: list[Article] = [] # Loop among first maxToExport articles & export them -for counter, article in Articles(maxToExport): +for counter, article in Articles(maxexport): if (counter["exported"] - 1) % 100 == 0: print( f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}" + f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n" ) print( - f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title) + f"{BOLD}{counter['exported']}.{RESET} " + highlight_unknown_chars(article.title) ) - fullPath: str = config.outputDir + "/" + article.getPath() - print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}") - makedirs(fullPath, exist_ok=True) - with open(fullPath + article.getFilename(), "w") as f: - f.write(article.getArticle()) + fullpath: str = config.output_dir + "/" + article.get_path() + print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}") + makedirs(fullpath, exist_ok=True) + with open(fullpath + article.get_filename(), "w") as f: + f.write(article.get_article()) # Store detected unknown characters - if len(article.getUnknownChars()) > 0: - unknownCharsArticles.append(article) + if len(article.get_unknown_chars()) > 0: + unknown_chars_articles.append(article) -for article in unknownCharsArticles: - unknownCharsApparitions: list = article.getUnknownChars() - nb: int = len(unknownCharsApparitions) +for article in unknown_chars_articles: + unknown_chars_apparitions: list = article.get_unknown_chars() + nb: int = len(unknown_chars_apparitions) s: str = "s" if nb > 1 else "" print( f"\n{BOLD}{nb}{RESET} unknown character{s} " + f"detected in article {BOLD}{article.id}{RESET}" + f"\n{BOLD}·{RESET} " - + highlightUnknownChars(article.title) + + highlight_unknown_chars(article.title) ) - for text in unknownCharsApparitions: - print(f" {BOLD}…{RESET} " + highlightUnknownChars(text)) + for text in unknown_chars_apparitions: + print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text)) db.close() # Close the database connection