fix encoding replacing
This commit is contained in:
parent
e1c8bd4b2e
commit
619376003f
@ -258,6 +258,7 @@ unknown_iso = (
|
|||||||
r"∆", # unknown â^†
|
r"∆", # unknown â^†
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Define terminal escape sequences to stylize output, regex escaped
|
# Define terminal escape sequences to stylize output, regex escaped
|
||||||
RED: str = "\033[91m"
|
RED: str = "\033[91m"
|
||||||
BOLD: str = "\033[1m"
|
BOLD: str = "\033[1m"
|
||||||
@ -268,7 +269,7 @@ def convert_body(text: str) -> str:
|
|||||||
for spip, markdown in spip_to_markdown:
|
for spip, markdown in spip_to_markdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in iso_to_utf:
|
for iso, utf in iso_to_utf:
|
||||||
text.replace(iso, utf)
|
text = text.replace(iso, utf)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
@ -276,7 +277,7 @@ def convert_meta(text: str) -> str:
|
|||||||
for spip, metadata in spip_to_text:
|
for spip, metadata in spip_to_text:
|
||||||
text = spip.sub(metadata, text)
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in iso_to_utf:
|
for iso, utf in iso_to_utf:
|
||||||
text.replace(iso, utf)
|
text = text.replace(iso, utf)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
@ -289,11 +290,11 @@ def remove_unknown_chars(text: str) -> str:
|
|||||||
def highlight_unknown_chars(text: str) -> str:
|
def highlight_unknown_chars(text: str) -> str:
|
||||||
# Highlight in COLOR unknown chars in text
|
# Highlight in COLOR unknown chars in text
|
||||||
for char in unknown_iso:
|
for char in unknown_iso:
|
||||||
for match in finditer(char, text):
|
for match in finditer("(" + char + ")+", text):
|
||||||
text = (
|
text = (
|
||||||
text[: match.start()]
|
text[: match.start()]
|
||||||
+ RED
|
+ RED
|
||||||
+ BOLD
|
# + BOLD
|
||||||
+ match.group()
|
+ match.group()
|
||||||
+ RESET
|
+ RESET
|
||||||
+ text[match.end() :]
|
+ text[match.end() :]
|
||||||
|
@ -62,7 +62,7 @@ for article in unknown_chars_articles:
|
|||||||
print(
|
print(
|
||||||
f"\n{BOLD}{nb}{RESET} unknown character{s} "
|
f"\n{BOLD}{nb}{RESET} unknown character{s} "
|
||||||
+ f"detected in article {BOLD}{article.id}{RESET}"
|
+ f"detected in article {BOLD}{article.id}{RESET}"
|
||||||
+ f"\n{BOLD}·{RESET} "
|
+ f"\n{BOLD}Title:{RESET} "
|
||||||
+ highlight_unknown_chars(article.title)
|
+ highlight_unknown_chars(article.title)
|
||||||
)
|
)
|
||||||
for text in unknown_chars_apparitions:
|
for text in unknown_chars_apparitions:
|
||||||
|
Loading…
Reference in New Issue
Block a user