refactor
This commit is contained in:
parent
df0b623383
commit
8021bd395e
@ -258,12 +258,6 @@ unknown_iso = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Define terminal escape sequences to stylize output, regex escaped
|
|
||||||
RED: str = "\033[91m"
|
|
||||||
BOLD: str = "\033[1m"
|
|
||||||
RESET: str = "\033[0m"
|
|
||||||
|
|
||||||
|
|
||||||
def convert_body(text: str) -> str:
|
def convert_body(text: str) -> str:
|
||||||
for spip, markdown in spip_to_markdown:
|
for spip, markdown in spip_to_markdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
@ -286,16 +280,11 @@ def remove_unknown_chars(text: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def highlight_unknown_chars(text: str) -> str:
|
def highlight_unknown_chars(text: str, pre: str, post: str) -> str:
|
||||||
# Highlight in COLOR unknown chars in text
|
# Add pre before unknown char and post after unknown char
|
||||||
for char in unknown_iso:
|
for char in unknown_iso:
|
||||||
for match in finditer("(" + char + ")+", text):
|
for match in finditer("(" + char + ")+", text):
|
||||||
text = (
|
text = (
|
||||||
text[: match.start()]
|
text[: match.start()] + pre + match.group() + post + text[match.end() :]
|
||||||
+ RED
|
|
||||||
# + BOLD
|
|
||||||
+ match.group()
|
|
||||||
+ RESET
|
|
||||||
+ text[match.end() :]
|
|
||||||
)
|
)
|
||||||
return text
|
return text
|
||||||
|
@ -9,21 +9,6 @@ from config import config
|
|||||||
from converter import highlight_unknown_chars
|
from converter import highlight_unknown_chars
|
||||||
from database import db
|
from database import db
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Clean the output dir & create a new
|
|
||||||
rmtree(config.output_dir, True)
|
|
||||||
mkdir(config.output_dir)
|
|
||||||
|
|
||||||
# Connect to the MySQL database with Peewee ORM
|
|
||||||
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
|
||||||
db.connect()
|
|
||||||
|
|
||||||
# Define max nb of articles to export based on first CLI param
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
maxexport = int(sys.argv[1])
|
|
||||||
else:
|
|
||||||
maxexport = config.default_export_nb
|
|
||||||
|
|
||||||
# Define terminal escape sequences to stylize output
|
# Define terminal escape sequences to stylize output
|
||||||
R: str = "\033[91m"
|
R: str = "\033[91m"
|
||||||
G: str = "\033[92m"
|
G: str = "\033[92m"
|
||||||
@ -31,10 +16,24 @@ B: str = "\033[94m"
|
|||||||
BOLD: str = "\033[1m"
|
BOLD: str = "\033[1m"
|
||||||
RESET: str = "\033[0m"
|
RESET: str = "\033[0m"
|
||||||
|
|
||||||
|
# Connect to the MySQL database with Peewee ORM
|
||||||
|
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
||||||
|
db.connect()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Define max nb of articles to export based on first CLI param
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
maxexport = int(sys.argv[1])
|
||||||
|
else:
|
||||||
|
maxexport = config.default_export_nb
|
||||||
|
|
||||||
|
# Clean the output dir & create a new
|
||||||
|
rmtree(config.output_dir, True)
|
||||||
|
mkdir(config.output_dir)
|
||||||
|
|
||||||
# Articles that contains unknown chars
|
# Articles that contains unknown chars
|
||||||
unknown_chars_articles: list[Article] = []
|
unknown_chars_articles: list[Article] = []
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Loop among first maxexport articles & export them
|
# Loop among first maxexport articles & export them
|
||||||
for counter, article in Articles(maxexport):
|
for counter, article in Articles(maxexport):
|
||||||
if (counter["exported"] - 1) % 100 == 0:
|
if (counter["exported"] - 1) % 100 == 0:
|
||||||
@ -45,7 +44,7 @@ if __name__ == "__main__":
|
|||||||
empty: str = "EMPTY " if len(article.text) < 1 else ""
|
empty: str = "EMPTY " if len(article.text) < 1 else ""
|
||||||
print(
|
print(
|
||||||
f"{BOLD}{counter['exported']}. {empty}{RESET}"
|
f"{BOLD}{counter['exported']}. {empty}{RESET}"
|
||||||
+ highlight_unknown_chars(article.title)
|
+ highlight_unknown_chars(article.title, R, RESET)
|
||||||
)
|
)
|
||||||
fullpath: str = config.output_dir + "/" + article.get_path()
|
fullpath: str = config.output_dir + "/" + article.get_path()
|
||||||
print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
|
print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
|
||||||
@ -62,9 +61,9 @@ if __name__ == "__main__":
|
|||||||
s: str = "s" if nb > 1 else ""
|
s: str = "s" if nb > 1 else ""
|
||||||
print(
|
print(
|
||||||
f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} "
|
f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} "
|
||||||
+ highlight_unknown_chars(article.title)
|
+ highlight_unknown_chars(article.title, R, RESET)
|
||||||
)
|
)
|
||||||
for text in unknown_chars_apparitions:
|
for text in unknown_chars_apparitions:
|
||||||
print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text))
|
print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text, R, RESET))
|
||||||
|
|
||||||
db.close() # Close the database connection
|
db.close() # Close the database connection
|
||||||
|
Loading…
Reference in New Issue
Block a user