From bb4c8ccb23cba60e241ba256b8364e5121210581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Tue, 25 Apr 2023 14:46:41 +0200 Subject: [PATCH] separate paragraph & text, more precise pure_text but more rigid than reference implementation --- spip2md/content.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/spip2md/content.py b/spip2md/content.py index d8877dc..dee683e 100644 --- a/spip2md/content.py +++ b/spip2md/content.py @@ -3,41 +3,38 @@ from lark import Lark spipParser = Lark( r""" section: /\n\r?/ - ( heading + ( paragraph + | heading | list | table | quote - | paragraph | SEPARATOR ) /\n\r?/ - heading: "{{{" paragraph "}}}" + paragraph: format_text + + heading: "{{{" format_text "}}}" list: unordered_list | unordered_sublist | ordered_list | ordered_sublist - unordered_list: (/\n\r?-* / paragraph)+ - unordered_sublist: (/\n\r?-*{2,7} / paragraph)+ - ordered_list: (\/n/r?-# / paragraph)+ - ordered_sublist: (\/n/r?-#{2,7} / paragraph)+ + unordered_list: (/\n\r?-* / format_text)+ + unordered_sublist: (/\n\r?-*{2,7} / format_text)+ + ordered_list: (\/n/r?-# / format_text)+ + ordered_sublist: (\/n/r?-#{2,7} / format_text)+ table: row+ row: /\n\r?\|/ cell+ - cell: paragraph "|" + cell: format_text "|" - quote: "" paragraph "" + quote: "" format_text "" - paragraph: text+ - - text: format_text - | link - | PURE_TEXT - - format_text: italic - | bold - | bold_italic + format_text: ( + | link + | PURE_TEXT + )+ italic: "{" PURE_TEXT "}" bold: "{{" PURE_TEXT "}}" @@ -53,7 +50,7 @@ spipParser = Lark( footnote: "[[" PURE_TEXT "]]" footnote: "[?" PURE_TEXT "]" - PURE_TEXT: /[^\n\r]+/ + PURE_TEXT: /[^\s\{\-\|\<\[\}\>\]][^\n\r\{\<\[\}\]]*/ SEPARATOR: /-{4,}/ """,