From b3fa5023c4feae49fb500f6a12b8f9d4c80f1bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Thu, 11 May 2023 10:33:35 +0200 Subject: [PATCH] fix some encoding bugs with regex replace --- spip2md/convert.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/spip2md/convert.py b/spip2md/convert.py index 116ee4b..81a3e8a 100644 --- a/spip2md/convert.py +++ b/spip2md/convert.py @@ -97,10 +97,23 @@ mappings = ( ), r"\1", ), + ( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 and saved like so + re.compile("’"), + r"’", + ), + ( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so + re.compile("eÌ\u0081"), + r"é", + ), + ( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so + re.compile("aÌ€"), + r"à", + ), ) def convert(markup): for spip, markdown in mappings: markup = spip.sub(markdown, markup) - return markup.encode("utf-8").decode("utf-8") + # return markup.encode("utf-8").decode("utf-8") + return markup