regex replacing spip to markdown conversion
This commit is contained in:
parent
8a6026d129
commit
cf2345e43e
@ -1,6 +1,3 @@
|
|||||||
pyyaml
|
pyyaml
|
||||||
python-slugify[unidecode]
|
python-slugify[unidecode]
|
||||||
peewee
|
peewee
|
||||||
|
|
||||||
# pyparsing
|
|
||||||
# lark
|
|
||||||
|
@ -1,36 +1,109 @@
|
|||||||
import re
|
import re
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
# from lark import Lark
|
|
||||||
# from pyparsing import Word, alphas
|
|
||||||
|
|
||||||
# larkParser = Lark(open(path.dirname(__file__) + "/spip.lark"))
|
|
||||||
|
|
||||||
|
|
||||||
class content:
|
class content:
|
||||||
_mappings = (
|
_mappings = {
|
||||||
(re.compile(r"\{\{\{(.*?)\}\}\}", re.S | re.I), r"## \1"),
|
"horizontal-rule": (
|
||||||
(re.compile(r"\{\{ \{(.*?)\} \}\}", re.S | re.I), r"***\1***"),
|
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
||||||
(re.compile(r"\{ \{\{(.*?)\}\} \}", re.S | re.I), r"***\1***"),
|
r"---",
|
||||||
(re.compile(r"\{\{(.*?)\}\}", re.S | re.I), r"**\1**"),
|
),
|
||||||
(re.compile(r"\{(.*?)\}", re.S | re.I), r"*\1*"),
|
"line-break": (
|
||||||
)
|
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
||||||
|
"\n",
|
||||||
|
),
|
||||||
|
"heading": (
|
||||||
|
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
||||||
|
r"## \1",
|
||||||
|
),
|
||||||
|
"strong": (
|
||||||
|
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
||||||
|
r"**\1**",
|
||||||
|
),
|
||||||
|
"emphasis": (
|
||||||
|
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
||||||
|
r"*\1*",
|
||||||
|
),
|
||||||
|
"strikethrough": (
|
||||||
|
re.compile(
|
||||||
|
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"~\1~",
|
||||||
|
),
|
||||||
|
"anchor": (
|
||||||
|
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
||||||
|
r"[\1](\2)",
|
||||||
|
),
|
||||||
|
"image": (
|
||||||
|
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
||||||
|
r"![image](\1)",
|
||||||
|
),
|
||||||
|
"document-anchors": (
|
||||||
|
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
||||||
|
r"[document](\1)",
|
||||||
|
),
|
||||||
|
"wikilink": (
|
||||||
|
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
||||||
|
r"[\1](https://wikipedia.org/wiki/\1)",
|
||||||
|
),
|
||||||
|
"footnote": (
|
||||||
|
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
||||||
|
r"",
|
||||||
|
),
|
||||||
|
"unordered-list": (
|
||||||
|
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
||||||
|
r"\1- ",
|
||||||
|
),
|
||||||
|
"wrong-unordered-list": (
|
||||||
|
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
||||||
|
r"\1- ",
|
||||||
|
),
|
||||||
|
"ordered-list": (
|
||||||
|
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
||||||
|
r"\g<1>1. ",
|
||||||
|
),
|
||||||
|
"table-metadata": (
|
||||||
|
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
||||||
|
r"",
|
||||||
|
),
|
||||||
|
"quote": (
|
||||||
|
re.compile(
|
||||||
|
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"> \1\2\2",
|
||||||
|
),
|
||||||
|
"box": (
|
||||||
|
re.compile(
|
||||||
|
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
"`\\1`",
|
||||||
|
),
|
||||||
|
"fence": (
|
||||||
|
re.compile(
|
||||||
|
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
"```\n\\1\n\n```",
|
||||||
|
),
|
||||||
|
"multi-language": ( # Keep only the first language
|
||||||
|
re.compile(
|
||||||
|
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"\1",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
def __init__(self, content):
|
def __init__(self, spip):
|
||||||
self.spip = content
|
self.markup = spip
|
||||||
|
|
||||||
def get_markdown(self):
|
def get_markdown(self):
|
||||||
markdown = self.spip
|
for spip, markdown in self._mappings.values():
|
||||||
for spip, md in self._mappings:
|
self.markup = spip.sub(markdown, self.markup)
|
||||||
markdown = spip.sub(md, markdown)
|
return self.markup
|
||||||
return markdown
|
|
||||||
# Parses the body & display parse tree
|
|
||||||
try:
|
|
||||||
print(f" parse tree :\n")
|
|
||||||
print(larkParser.parse(self.spip).pretty())
|
|
||||||
except Exception as e:
|
|
||||||
print(" PARSING FAILED :\n", e)
|
|
||||||
return markdown
|
|
||||||
|
|
||||||
|
|
||||||
# Parses a file & display its parse tree
|
# Parses a file & display its parse tree
|
||||||
@ -40,14 +113,12 @@ def test(filename):
|
|||||||
print(f"--- Conversion of {filename} ---\n\n")
|
print(f"--- Conversion of {filename} ---\n\n")
|
||||||
c = content(raw)
|
c = content(raw)
|
||||||
print(c.get_markdown())
|
print(c.get_markdown())
|
||||||
# print(f"--- Parse tree of {filename} ---\n\n")
|
|
||||||
# print(larkParser.parse(raw))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Test
|
# Test
|
||||||
test("../test/0.spip")
|
test("../test/0.spip")
|
||||||
# test("../test/1.spip")
|
test("../test/1.spip")
|
||||||
# test("../test/2.spip")
|
test("../test/2.spip")
|
||||||
# test("../test/3.spip")
|
test("../test/3.spip")
|
||||||
# test("../test/4.spip")
|
test("../test/4.spip")
|
||||||
|
Loading…
Reference in New Issue
Block a user