separate paragraph & text, more precise pure_text but more rigid than reference implementation

This commit is contained in:
Guilhem Fauré 2023-04-25 14:46:41 +02:00
parent 2132fdfd68
commit bb4c8ccb23

View File

@ -3,41 +3,38 @@ from lark import Lark
spipParser = Lark( spipParser = Lark(
r""" r"""
section: /\n\r?/ section: /\n\r?/
( heading ( paragraph
| heading
| list | list
| table | table
| quote | quote
| paragraph
| SEPARATOR | SEPARATOR
) /\n\r?/ ) /\n\r?/
heading: "{{{" paragraph "}}}" paragraph: format_text
heading: "{{{" format_text "}}}"
list: unordered_list list: unordered_list
| unordered_sublist | unordered_sublist
| ordered_list | ordered_list
| ordered_sublist | ordered_sublist
unordered_list: (/\n\r?-* / paragraph)+ unordered_list: (/\n\r?-* / format_text)+
unordered_sublist: (/\n\r?-*{2,7} / paragraph)+ unordered_sublist: (/\n\r?-*{2,7} / format_text)+
ordered_list: (\/n/r?-# / paragraph)+ ordered_list: (\/n/r?-# / format_text)+
ordered_sublist: (\/n/r?-#{2,7} / paragraph)+ ordered_sublist: (\/n/r?-#{2,7} / format_text)+
table: row+ table: row+
row: /\n\r?\|/ cell+ row: /\n\r?\|/ cell+
cell: paragraph "|" cell: format_text "|"
quote: "<quote>" paragraph "</quote>" quote: "<quote>" format_text "</quote>"
paragraph: text+ format_text: (
| link
text: format_text | PURE_TEXT
| link )+
| PURE_TEXT
format_text: italic
| bold
| bold_italic
italic: "{" PURE_TEXT "}" italic: "{" PURE_TEXT "}"
bold: "{{" PURE_TEXT "}}" bold: "{{" PURE_TEXT "}}"
@ -53,7 +50,7 @@ spipParser = Lark(
footnote: "[[" PURE_TEXT "]]" footnote: "[[" PURE_TEXT "]]"
footnote: "[?" PURE_TEXT "]" footnote: "[?" PURE_TEXT "]"
PURE_TEXT: /[^\n\r]+/ PURE_TEXT: /[^\s\{\-\|\<\[\}\>\]][^\n\r\{\<\[\}\]]*/
SEPARATOR: /-{4,}/ SEPARATOR: /-{4,}/
""", """,