fixed most of the encoding bugs
This commit is contained in:
parent
b3fa5023c4
commit
995fee5b6a
@ -1,6 +1,7 @@
|
||||
import re
|
||||
|
||||
mappings = (
|
||||
# SPIP syntax to Markdown
|
||||
( # horizontal-rule
|
||||
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
||||
# r"---",
|
||||
@ -97,18 +98,95 @@ mappings = (
|
||||
),
|
||||
r"\1",
|
||||
),
|
||||
# Broken encoding
|
||||
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("’"),
|
||||
r"’",
|
||||
),
|
||||
( # Fix UTF-8 † that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("‘"),
|
||||
r"‘",
|
||||
),
|
||||
( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("eÌ\u0081"),
|
||||
r"é",
|
||||
),
|
||||
( # Fix UTF-8 è that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("è"),
|
||||
r"è",
|
||||
),
|
||||
( # Fix UTF-8 ê that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("ê"),
|
||||
r"ê",
|
||||
),
|
||||
( # Fix UTF-8 ê that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("ô"),
|
||||
r"ô",
|
||||
),
|
||||
( # Fix UTF-8 î that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("î"),
|
||||
r"î",
|
||||
),
|
||||
( # Fix UTF-8 ï that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("ï"),
|
||||
r"ï",
|
||||
),
|
||||
( # Fix UTF-8 ö that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("ö"),
|
||||
r"ö",
|
||||
),
|
||||
( # Fix UTF-8 ö that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("ü"),
|
||||
r"ü",
|
||||
),
|
||||
( # WARNING Fix UTF-8 é ? that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("eÌ "),
|
||||
r"é",
|
||||
),
|
||||
( # Fix UTF-8 é that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("à"),
|
||||
r"à",
|
||||
),
|
||||
( # Fix UTF-8 … that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("…"),
|
||||
r"…",
|
||||
),
|
||||
( # Fix UTF-8 “ that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("“"),
|
||||
r"“",
|
||||
),
|
||||
( # Fix UTF-8 ” that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("â€\u009d"),
|
||||
r"”",
|
||||
),
|
||||
( # Fix UTF-8 – that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("–"),
|
||||
r"–",
|
||||
),
|
||||
( # Fix UTF-8 – that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("—"),
|
||||
r"—",
|
||||
),
|
||||
( # Fix UTF-8 − that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("â€\u0090"),
|
||||
r"−",
|
||||
),
|
||||
( # Fix UTF-8 • that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("•"),
|
||||
r"•",
|
||||
),
|
||||
( # Fix UTF-8 † that was interpreted as ISO 8859-1 and saved like so
|
||||
re.compile("†"),
|
||||
r"† ",
|
||||
),
|
||||
( # Delete unknown 

|
||||
re.compile("
"),
|
||||
r"",
|
||||
),
|
||||
( # Delete unknown Ì\u0081
|
||||
re.compile("Ì\u0081"),
|
||||
r"",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user