created precise terminals with preventive lookaheads, named every terminal
This commit is contained in:
parent
f59782ad77
commit
62558d2713
@ -1,58 +1,93 @@
|
|||||||
// SPIP Markup grammar for Lark
|
// SPIP Markup grammar for Lark parser
|
||||||
|
|
||||||
start: _N* block ( _N+ block )+ _N*
|
start: _N* block ( _N+ block )+ _N*
|
||||||
|
|
||||||
?block: SEPARATOR -> hr
|
?block: paragraph
|
||||||
| unordered_list
|
|
||||||
| ordered_list
|
|
||||||
| table
|
|
||||||
| paragraph
|
|
||||||
| heading
|
| heading
|
||||||
|
| list
|
||||||
|
| table
|
||||||
|
| SEPARATOR -> hr
|
||||||
|
|
||||||
unordered_list: ( "-*" list_element _N )+ -> ul
|
list: unordered_list
|
||||||
ordered_list: ( "-#" list_element _N )+ -> ol
|
| ordered_list
|
||||||
|
|
||||||
|
unordered_list: ( _HYPHEN_STAR list_element _N )+ -> ul
|
||||||
|
ordered_list: ( _HYPHEN_HASH list_element _N )+ -> ol
|
||||||
list_element: _inline_format -> li
|
list_element: _inline_format -> li
|
||||||
|
|
||||||
table: ( row _N )+ -> table
|
table: ( row _N )+ -> table
|
||||||
row: ( "|" cell )+ "|" -> tr
|
row: ( _PIPE cell )+ _PIPE -> tr
|
||||||
cell: _inline_format -> td
|
cell: _inline_format -> td
|
||||||
|
|
||||||
heading: "{{{" ( link | nested_italic | nested_bold | TEXT ) "}}}" -> h2
|
heading: _O_CURLY_3 ( INNER_TEXT | link | nested_italic | nested_bold ) _C_CURLY_3 -> h2
|
||||||
|
|
||||||
paragraph: ( _inline_format _N? )+ -> p
|
paragraph: ( _inline_format _N? )+ -> p
|
||||||
|
|
||||||
_inline_format: bold
|
_inline_format: TEXT
|
||||||
| italic
|
| italic
|
||||||
|
| bold
|
||||||
| link
|
| link
|
||||||
| TEXT
|
|
||||||
|
|
||||||
bold: "{{" ( link | nested_italic | TEXT )+ "}}" -> strong
|
bold: _O_CURLY_2 ( INNER_TEXT | link | nested_italic )+ _C_CURLY_2 -> strong
|
||||||
italic: "{" ( link | nested_bold | TEXT )+ "}" -> em
|
italic: _O_CURLY ( INNER_TEXT | link | nested_bold )+ _C_CURLY -> em
|
||||||
nested_bold: TEXT _NOT_LEFT_BRACE "{{" ( link | TEXT )+ "}}" -> strong
|
nested_bold: TEXT+ _O_CURLY_2 ( INNER_TEXT | link )+ _C_CURLY_2 -> strong
|
||||||
nested_italic: TEXT _NOT_LEFT_BRACE "{" ( link | TEXT )+ "}" -> em
|
nested_italic: TEXT+ _O_CURLY ( INNER_TEXT | link )+ _C_CURLY -> em
|
||||||
|
|
||||||
?link: a
|
?link: footnote
|
||||||
| footnote
|
|
||||||
| wikipedia_link
|
| wikipedia_link
|
||||||
|
| a
|
||||||
|
|
||||||
a: "[" TEXT "->" link_destination "]" -> a
|
footnote: _O_SQUARE_2 HREF _C_SQUARE_2 -> footnote
|
||||||
link_destination: TEXT -> href
|
wikipedia_link: _O_SQUARE_INTERO HREF _C_SQUARE -> a_wikipedia
|
||||||
|
a: _LINK_OPENING LINK_TEXT _ARROW HREF _C_SQUARE -> a
|
||||||
footnote: "[[" TEXT "]]" -> footnote
|
|
||||||
wikipedia_link: "[?" TEXT "]" -> a_wikipedia
|
|
||||||
|
|
||||||
// Negative terminals
|
|
||||||
|
|
||||||
_NOT_LEFT_BRACE: /[^\{]/
|
|
||||||
|
|
||||||
// Terminals
|
// Terminals
|
||||||
|
/// Windows or Unix line breaks
|
||||||
|
_N: /\r?\n/
|
||||||
|
|
||||||
|
/// Blocks
|
||||||
|
_HYPHEN_STAR: "-*"
|
||||||
|
_HYPHEN_HASH: "-#"
|
||||||
|
_PIPE: "|"
|
||||||
|
|
||||||
|
/// Markup
|
||||||
|
_O_CURLY_3: "{{{"
|
||||||
|
_C_CURLY_3: "}}}"
|
||||||
|
_O_CURLY_2: "{{"
|
||||||
|
_C_CURLY_2: "}}"
|
||||||
|
_O_CURLY: "{"
|
||||||
|
_C_CURLY: "}"
|
||||||
|
|
||||||
|
/// Links
|
||||||
|
_O_SQUARE_2: "[["
|
||||||
|
_C_SQUARE_2: "]]"
|
||||||
|
_O_SQUARE_INTERO: "[?"
|
||||||
|
_C_SQUARE: "]"
|
||||||
|
_ARROW: "->"
|
||||||
|
/// Opening square bracket followed by text and an hyphen angle bracket arrow
|
||||||
|
_LINK_OPENING: /\[(?=[^\r\n\[\]]*->)/
|
||||||
|
|
||||||
|
/// Content
|
||||||
SEPARATOR: "----" "-"*
|
SEPARATOR: "----" "-"*
|
||||||
|
|
||||||
// Windows or Unix line breaks
|
/// Text
|
||||||
_N: /\r/? /\n/
|
// - Don’t contains line breaks
|
||||||
|
// - Don’t contains any opening markup elements…
|
||||||
|
// - EXCEPTED when they are used as in regular text
|
||||||
|
TEXT: /[^\r\n\{\[\<]+/
|
||||||
|
| /\[(?!.*->.*\])/
|
||||||
|
|
||||||
// Pure text :
|
/// Inner text :
|
||||||
// - Never contains line breaks
|
// - Don’t contains line breaks
|
||||||
// - Is the least priority element, so should be lazily matched
|
// - Don’t contains markup closing right curly braces
|
||||||
TEXT: /.+?/
|
INNER_TEXT: /[^\r\n\}]+/
|
||||||
|
|
||||||
|
/// Link href :
|
||||||
|
// - Don’t contains line breaks
|
||||||
|
// - Don’t contains markup closing right square brackets
|
||||||
|
HREF: /[^\r\n\]]+/
|
||||||
|
|
||||||
|
/// Link text :
|
||||||
|
// - Don’t contains line breaks
|
||||||
|
// - Don’t contains an hyphen angle bracket arrow ( -> )
|
||||||
|
LINK_TEXT: /[^\r\n\[\]]+(?=->)/
|
||||||
|
Loading…
Reference in New Issue
Block a user