This commit is contained in:
Guilhem Fauré 2023-05-17 12:04:04 +02:00
parent df0b623383
commit 8021bd395e
2 changed files with 23 additions and 35 deletions

View File

@ -258,12 +258,6 @@ unknown_iso = (
)
# Define terminal escape sequences to stylize output, regex escaped
RED: str = "\033[91m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
def convert_body(text: str) -> str:
for spip, markdown in spip_to_markdown:
text = spip.sub(markdown, text)
@ -286,16 +280,11 @@ def remove_unknown_chars(text: str) -> str:
return text
def highlight_unknown_chars(text: str) -> str:
# Highlight in COLOR unknown chars in text
def highlight_unknown_chars(text: str, pre: str, post: str) -> str:
# Add pre before unknown char and post after unknown char
for char in unknown_iso:
for match in finditer("(" + char + ")+", text):
text = (
text[: match.start()]
+ RED
# + BOLD
+ match.group()
+ RESET
+ text[match.end() :]
text[: match.start()] + pre + match.group() + post + text[match.end() :]
)
return text

View File

@ -9,21 +9,6 @@ from config import config
from converter import highlight_unknown_chars
from database import db
if __name__ == "__main__":
# Clean the output dir & create a new
rmtree(config.output_dir, True)
mkdir(config.output_dir)
# Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect()
# Define max nb of articles to export based on first CLI param
if len(sys.argv) > 1:
maxexport = int(sys.argv[1])
else:
maxexport = config.default_export_nb
# Define terminal escape sequences to stylize output
R: str = "\033[91m"
G: str = "\033[92m"
@ -31,10 +16,24 @@ B: str = "\033[94m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
# Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect()
if __name__ == "__main__":
# Define max nb of articles to export based on first CLI param
if len(sys.argv) > 1:
maxexport = int(sys.argv[1])
else:
maxexport = config.default_export_nb
# Clean the output dir & create a new
rmtree(config.output_dir, True)
mkdir(config.output_dir)
# Articles that contains unknown chars
unknown_chars_articles: list[Article] = []
if __name__ == "__main__":
# Loop among first maxexport articles & export them
for counter, article in Articles(maxexport):
if (counter["exported"] - 1) % 100 == 0:
@ -45,7 +44,7 @@ if __name__ == "__main__":
empty: str = "EMPTY " if len(article.text) < 1 else ""
print(
f"{BOLD}{counter['exported']}. {empty}{RESET}"
+ highlight_unknown_chars(article.title)
+ highlight_unknown_chars(article.title, R, RESET)
)
fullpath: str = config.output_dir + "/" + article.get_path()
print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
@ -62,9 +61,9 @@ if __name__ == "__main__":
s: str = "s" if nb > 1 else ""
print(
f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} "
+ highlight_unknown_chars(article.title)
+ highlight_unknown_chars(article.title, R, RESET)
)
for text in unknown_chars_apparitions:
print(f" {BOLD}{RESET} " + highlight_unknown_chars(text))
print(f" {BOLD}{RESET} " + highlight_unknown_chars(text, R, RESET))
db.close() # Close the database connection