fix some encoding bugs with regex replace

This commit is contained in:
Guilhem Fauré 2023-05-11 10:33:35 +02:00
parent 5c78dcd753
commit b3fa5023c4

View File

@ -97,10 +97,23 @@ mappings = (
),
r"\1",
),
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 and saved like so
re.compile("’"),
r"",
),
( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so
re.compile("\u0081"),
r"é",
),
( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so
re.compile("à"),
r"à",
),
)
def convert(markup):
for spip, markdown in mappings:
markup = spip.sub(markdown, markup)
return markup.encode("utf-8").decode("utf-8")
# return markup.encode("utf-8").decode("utf-8")
return markup