spip2md/spip2md/spip.lark
2023-05-09 16:31:34 +02:00

108 lines
3.3 KiB
Plaintext

// Flexible SPIP Markup grammar for Lark parser
start: _N* block ( _N+ block )* _N*
?block: HORIZONTAL_RULE
| heading
| list
| table
| _block_tag
| paragraph{PARAGRAPH_TEXT, TEXT}
HORIZONTAL_RULE: /- *- *- *- *[\- ]*/
?list: unordered_list
| ordered_list
unordered_list: ( _UL list_item _N )* _UL list_item _N?
ordered_list: ( _OL list_item _N )* _OL list_item _N?
list_item: _inline{TEXT}+
_UL: /-\*|-(?!#|---)/
_OL: /-#/
table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+
table_metadata: table_title "|" table_description
table_title: _inline{TABLE_TEXT}
table_description: _inline{TABLE_TEXT}
table_row: ( _TBL table_cell )+ "|"
table_cell: _inline{TABLE_TEXT}
_TBL_META: /\|\|(?=[^\r\n]+\|\|)/
_TBL: /\|(?=[^\r\n]+\|)/
heading: _H paragraph{MARKED_TEXT, MARKED_TEXT} "}}}"
_H: "{{{"
_block_tag: pair_block_tag
| orphan_block_tag
pair_block_tag: _PAIR_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" start? "</" _PURE_TEXT ">" -> tag
orphan_block_tag: _ORPHAN_TAG_ANGLE "/"? TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
_PAIR_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>[\s\S]*<\/\1>)/i
_ORPHAN_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>)(?![\s\S]*\<\/\1\>)/i
paragraph{begin, text}: _inline{begin} _N? ( ( _inline{text} | _inline_tag ) _N? )*
_inline{text}: _link
| emphasis
| strong
| text
_inline_tag: pair_inline_tag
| short_inline_tag
| orphan_inline_tag
pair_inline_tag: _INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} "</" _PURE_TEXT ">" -> tag
short_inline_tag: _SHORT_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} -> tag
orphan_inline_tag: _ORPHAN_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:|,="\/\-\. ]*>[\s\S]+<\/\1>)/i
_SHORT_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:,="\/\.\|\- ]*>)(?!\/|img|emb|doc|[\s\S]+<\/\1>)/i
_ORPHAN_INLINE_TAG_ANGLE: /<(?=(img|emb|doc)[0-9a-z\|_:,="\/\.\|\- ]*>)/i
_link: footnote
| wikilink
| anchor
footnote: _FOOT FOOTNOTE_CONTENT "]]"
wikilink: _WIKI HREF "]"
anchor: _A A_TEXT "->" " "* HREF " "* "]"
_FOOT: /\[\[/
_WIKI: /\[\?/
_A: /\[(?=[^\[\n\r]+->)/
FOOTNOTE_CONTENT: /[0-9A-Za-z_:\/\-\.\ ]+/
HREF: _PURE_TEXT | _PLACEHOLDER
A_TEXT: /[^\r\n\{]+?(?=->)/
strong: _STRONG ( _inline{MARKED_TEXT} )+ ( "}}" | _N )
emphasis: _EM ( _inline{MARKED_TEXT} )+ ( "}" | _N )
_STRONG: /{{(?=[^\{])/
_EM: /{(?=[^\{])/
PARAGRAPH_TEXT: / [^\r\n|\-{<]
(?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
))*
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \|(?=[^\r\n|])
/x
TEXT: / (?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
))+
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \\{
/x
TABLE_TEXT: /(?:[^\|\r\n\{](?![^\[\n\r]*->))+/
MARKED_TEXT: /(?:[^\}\r\n\{](?![^\[\n\r]*->))+/
TAG_NAME: _PURE_TEXT
TAG_OPTION: _PURE_TEXT
_OPTION_SEP: " "* "|" " "* | " "+
_N: /\r?\n/
_PURE_TEXT: /[0-9A-Za-z_:,=?"\/\-\.]+/
_PLACEHOLDER: /\*+/