use lazy repetion for text, simplification of grammar

This commit is contained in:
Guilhem Fauré 2023-04-27 11:39:59 +02:00
parent d9341177ba
commit 42276b5b5c

View File

@ -1,15 +1,13 @@
start: _N? block ( _N+ block )+ _N* // SPIP Markup grammar for Lark
?block: heading start: _N* block ( _N+ block )+ _N*
| SEPARATOR -> hr
?block: SEPARATOR -> hr
| unordered_list | unordered_list
| ordered_list | ordered_list
| table | table
| paragraph | paragraph
| heading
heading: "{{{" ( TEXT | link | nested_italic | nested_bold ) "}}}" -> h2
SEPARATOR.9: "----" "-"*
unordered_list: ( "-*" list_element _N )+ -> ul unordered_list: ( "-*" list_element _N )+ -> ul
ordered_list: ( "-#" list_element _N )+ -> ol ordered_list: ( "-#" list_element _N )+ -> ol
@ -19,38 +17,42 @@ table: ( row _N )+ -> table
row: ( "|" cell )+ "|" -> tr row: ( "|" cell )+ "|" -> tr
cell: _inline_format -> td cell: _inline_format -> td
paragraph: ( _inline_format _N? )+ -> p heading: "{{{" ( link | nested_italic | nested_bold | TEXT ) "}}}" -> h2
// Windows or Unix line break paragraph: ( _inline_format _N? )+ -> p
_N: /\r/? /\n/
_inline_format: bold _inline_format: bold
| italic | italic
| link | link
| TEXT | TEXT
bold: "{{" ( TEXT | link | nested_italic )+ "}}" -> strong bold: "{{" ( link | nested_italic | TEXT )+ "}}" -> strong
italic: "{" ( TEXT | link | nested_bold )+ "}" -> em italic: "{" ( link | nested_bold | TEXT )+ "}" -> em
nested_bold: TEXT _NOT_LEFT_BRACE "{{" ( link | TEXT )+ "}}" -> strong
nested_bold: _NOT_LBRACE "{{" ( TEXT | link ) "}}" _NOT_RBRACE -> strong nested_italic: TEXT _NOT_LEFT_BRACE "{" ( link | TEXT )+ "}" -> em
nested_italic: _NOT_LBRACE "{" ( TEXT | link ) "}" _NOT_RBRACE -> em
_NOT_LBRACE: /[^\{]/
_NOT_RBRACE: /[^\}]/
?link: a ?link: a
| footnote | footnote
| wikipedia_link | wikipedia_link
a: "[" link_text "->" link_destination "]" -> a a: "[" TEXT "->" link_destination "]" -> a
link_text: TEXT -> text
link_destination: TEXT -> href link_destination: TEXT -> href
footnote: "[[" footnote_content "]]" -> footnote
footnote_content: TEXT -> content footnote: "[[" TEXT "]]" -> footnote
wikipedia_link: "[?" wikipedia_query "]" -> a_wikipedia wikipedia_link: "[?" TEXT "]" -> a_wikipedia
wikipedia_query: TEXT -> query
// Negative terminals
_NOT_LEFT_BRACE: /[^\{]/
// Terminals
SEPARATOR: "----" "-"*
// Windows or Unix line breaks
_N: /\r/? /\n/
// Pure text : // Pure text :
// - Never contains line breaks // - Never contains line breaks
// - Never contains curly braces // - Is the least priority element, so should be lazily matched
TEXT.0: /[^\r\n\{\}]/+ TEXT: /.+?/