fix encoding replacing

This commit is contained in:
Guilhem Fauré 2023-05-16 16:08:03 +02:00
parent e1c8bd4b2e
commit 619376003f
2 changed files with 6 additions and 5 deletions

View File

@ -258,6 +258,7 @@ unknown_iso = (
r"∆", # unknown â^†
)
# Define terminal escape sequences to stylize output, regex escaped
RED: str = "\033[91m"
BOLD: str = "\033[1m"
@ -268,7 +269,7 @@ def convert_body(text: str) -> str:
for spip, markdown in spip_to_markdown:
text = spip.sub(markdown, text)
for iso, utf in iso_to_utf:
text.replace(iso, utf)
text = text.replace(iso, utf)
return text
@ -276,7 +277,7 @@ def convert_meta(text: str) -> str:
for spip, metadata in spip_to_text:
text = spip.sub(metadata, text)
for iso, utf in iso_to_utf:
text.replace(iso, utf)
text = text.replace(iso, utf)
return text
@ -289,11 +290,11 @@ def remove_unknown_chars(text: str) -> str:
def highlight_unknown_chars(text: str) -> str:
# Highlight in COLOR unknown chars in text
for char in unknown_iso:
for match in finditer(char, text):
for match in finditer("(" + char + ")+", text):
text = (
text[: match.start()]
+ RED
+ BOLD
# + BOLD
+ match.group()
+ RESET
+ text[match.end() :]

View File

@ -62,7 +62,7 @@ for article in unknown_chars_articles:
print(
f"\n{BOLD}{nb}{RESET} unknown character{s} "
+ f"detected in article {BOLD}{article.id}{RESET}"
+ f"\n{BOLD}·{RESET} "
+ f"\n{BOLD}Title:{RESET} "
+ highlight_unknown_chars(article.title)
)
for text in unknown_chars_apparitions: