From 8021bd395ef1c509fa0b53d587900db1c7d571c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Wed, 17 May 2023 12:04:04 +0200 Subject: [PATCH] refactor --- spip2md/converter.py | 17 +++-------------- spip2md/main.py | 41 ++++++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/spip2md/converter.py b/spip2md/converter.py index 93d48a6..c318ef8 100644 --- a/spip2md/converter.py +++ b/spip2md/converter.py @@ -258,12 +258,6 @@ unknown_iso = ( ) -# Define terminal escape sequences to stylize output, regex escaped -RED: str = "\033[91m" -BOLD: str = "\033[1m" -RESET: str = "\033[0m" - - def convert_body(text: str) -> str: for spip, markdown in spip_to_markdown: text = spip.sub(markdown, text) @@ -286,16 +280,11 @@ def remove_unknown_chars(text: str) -> str: return text -def highlight_unknown_chars(text: str) -> str: - # Highlight in COLOR unknown chars in text +def highlight_unknown_chars(text: str, pre: str, post: str) -> str: + # Add pre before unknown char and post after unknown char for char in unknown_iso: for match in finditer("(" + char + ")+", text): text = ( - text[: match.start()] - + RED - # + BOLD - + match.group() - + RESET - + text[match.end() :] + text[: match.start()] + pre + match.group() + post + text[match.end() :] ) return text diff --git a/spip2md/main.py b/spip2md/main.py index fd28197..20eea96 100755 --- a/spip2md/main.py +++ b/spip2md/main.py @@ -9,21 +9,6 @@ from config import config from converter import highlight_unknown_chars from database import db -if __name__ == "__main__": - # Clean the output dir & create a new - rmtree(config.output_dir, True) - mkdir(config.output_dir) - -# Connect to the MySQL database with Peewee ORM -db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) -db.connect() - -# Define max nb of articles to export based on first CLI param -if len(sys.argv) > 1: - maxexport = int(sys.argv[1]) -else: - maxexport = config.default_export_nb - # Define terminal escape sequences to stylize output R: str = "\033[91m" G: str = "\033[92m" @@ -31,10 +16,24 @@ B: str = "\033[94m" BOLD: str = "\033[1m" RESET: str = "\033[0m" -# Articles that contains unknown chars -unknown_chars_articles: list[Article] = [] +# Connect to the MySQL database with Peewee ORM +db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) +db.connect() if __name__ == "__main__": + # Define max nb of articles to export based on first CLI param + if len(sys.argv) > 1: + maxexport = int(sys.argv[1]) + else: + maxexport = config.default_export_nb + + # Clean the output dir & create a new + rmtree(config.output_dir, True) + mkdir(config.output_dir) + + # Articles that contains unknown chars + unknown_chars_articles: list[Article] = [] + # Loop among first maxexport articles & export them for counter, article in Articles(maxexport): if (counter["exported"] - 1) % 100 == 0: @@ -45,7 +44,7 @@ if __name__ == "__main__": empty: str = "EMPTY " if len(article.text) < 1 else "" print( f"{BOLD}{counter['exported']}. {empty}{RESET}" - + highlight_unknown_chars(article.title) + + highlight_unknown_chars(article.title, R, RESET) ) fullpath: str = config.output_dir + "/" + article.get_path() print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}") @@ -62,9 +61,9 @@ if __name__ == "__main__": s: str = "s" if nb > 1 else "" print( f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} " - + highlight_unknown_chars(article.title) + + highlight_unknown_chars(article.title, R, RESET) ) for text in unknown_chars_apparitions: - print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text)) + print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text, R, RESET)) - db.close() # Close the database connection + db.close() # Close the database connection