From cf2345e43e65c30c561dbf690684ef1d247769c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Wed, 10 May 2023 11:00:27 +0200 Subject: [PATCH] regex replacing spip to markdown conversion --- requirements.txt | 3 - spip2md/content.py | 133 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 102 insertions(+), 34 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8ec22fc..2a1c723 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,3 @@ pyyaml python-slugify[unidecode] peewee - -# pyparsing -# lark diff --git a/spip2md/content.py b/spip2md/content.py index 5fe1d96..5f04b1d 100644 --- a/spip2md/content.py +++ b/spip2md/content.py @@ -1,36 +1,109 @@ import re from os import path -# from lark import Lark -# from pyparsing import Word, alphas - -# larkParser = Lark(open(path.dirname(__file__) + "/spip.lark")) - class content: - _mappings = ( - (re.compile(r"\{\{\{(.*?)\}\}\}", re.S | re.I), r"## \1"), - (re.compile(r"\{\{ \{(.*?)\} \}\}", re.S | re.I), r"***\1***"), - (re.compile(r"\{ \{\{(.*?)\}\} \}", re.S | re.I), r"***\1***"), - (re.compile(r"\{\{(.*?)\}\}", re.S | re.I), r"**\1**"), - (re.compile(r"\{(.*?)\}", re.S | re.I), r"*\1*"), - ) + _mappings = { + "horizontal-rule": ( + re.compile(r"- ?- ?- ?- ?[\- ]*|
", re.S | re.I), + r"---", + ), + "line-break": ( + re.compile(r"\r?\n_ *(?=\r?\n)|
", re.S | re.I), + "\n", + ), + "heading": ( + re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I), + r"## \1", + ), + "strong": ( + re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I), + r"**\1**", + ), + "emphasis": ( + re.compile(r"\{ *(.*?) *\}", re.S | re.I), + r"*\1*", + ), + "strikethrough": ( + re.compile( + r"\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)", + re.S | re.I, + ), + r"~\1~", + ), + "anchor": ( + re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I), + r"[\1](\2)", + ), + "image": ( + re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I), + r"![image](\1)", + ), + "document-anchors": ( + re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I), + r"[document](\1)", + ), + "wikilink": ( + re.compile(r"\[\? *(.*?) *\]", re.S | re.I), + r"[\1](https://wikipedia.org/wiki/\1)", + ), + "footnote": ( + re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I), + r"", + ), + "unordered-list": ( + re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I), + r"\1- ", + ), + "wrong-unordered-list": ( + re.compile(r"(\r?\n)\* +", re.S | re.I), + r"\1- ", + ), + "ordered-list": ( + re.compile(r"(\r?\n)-# *", re.S | re.I), + r"\g<1>1. ", + ), + "table-metadata": ( + re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I), + r"", + ), + "quote": ( + re.compile( + r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)", + re.S | re.I, + ), + r"> \1\2\2", + ), + "box": ( + re.compile( + r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)", + re.S | re.I, + ), + "`\\1`", + ), + "fence": ( + re.compile( + r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)", + re.S | re.I, + ), + "```\n\\1\n\n```", + ), + "multi-language": ( # Keep only the first language + re.compile( + r"\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>", + re.S | re.I, + ), + r"\1", + ), + } - def __init__(self, content): - self.spip = content + def __init__(self, spip): + self.markup = spip def get_markdown(self): - markdown = self.spip - for spip, md in self._mappings: - markdown = spip.sub(md, markdown) - return markdown - # Parses the body & display parse tree - try: - print(f" parse tree :\n") - print(larkParser.parse(self.spip).pretty()) - except Exception as e: - print(" PARSING FAILED :\n", e) - return markdown + for spip, markdown in self._mappings.values(): + self.markup = spip.sub(markdown, self.markup) + return self.markup # Parses a file & display its parse tree @@ -40,14 +113,12 @@ def test(filename): print(f"--- Conversion of {filename} ---\n\n") c = content(raw) print(c.get_markdown()) - # print(f"--- Parse tree of {filename} ---\n\n") - # print(larkParser.parse(raw)) if __name__ == "__main__": # Test test("../test/0.spip") - # test("../test/1.spip") - # test("../test/2.spip") - # test("../test/3.spip") - # test("../test/4.spip") + test("../test/1.spip") + test("../test/2.spip") + test("../test/3.spip") + test("../test/4.spip")