spip2md/spip2md/spip.lark

108 lines
3.3 KiB
Plaintext
Raw Normal View History

2023-04-28 15:44:50 +02:00
// Flexible SPIP Markup grammar for Lark parser
start: _N* block ( _N+ block )* _N*
2023-05-03 16:06:23 +02:00
?block: HORIZONTAL_RULE
| heading
| list
| table
2023-05-02 10:56:28 +02:00
| _block_tag
| paragraph{PARAGRAPH_TEXT, TEXT}
2023-04-28 15:44:50 +02:00
HORIZONTAL_RULE: /- *- *- *- *[\- ]*/
2023-04-26 11:13:47 +02:00
2023-04-27 17:33:39 +02:00
?list: unordered_list
| ordered_list
unordered_list: ( _UL list_item _N )* _UL list_item _N?
ordered_list: ( _OL list_item _N )* _OL list_item _N?
2023-04-28 15:44:50 +02:00
list_item: _inline{TEXT}+
_UL: /-\*|-(?!#|---)/
2023-04-28 15:44:50 +02:00
_OL: /-#/
2023-05-09 09:29:10 +02:00
table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+
2023-04-28 15:44:50 +02:00
table_metadata: table_title "|" table_description
table_title: _inline{TABLE_TEXT}
table_description: _inline{TABLE_TEXT}
table_row: ( _TBL table_cell )+ "|"
table_cell: _inline{TABLE_TEXT}
_TBL_META: /\|\|(?=[^\r\n]+\|\|)/
_TBL: /\|(?=[^\r\n]+\|)/
2023-04-28 15:44:50 +02:00
2023-05-09 15:52:18 +02:00
heading: _H paragraph{MARKED_TEXT, MARKED_TEXT} "}}}"
2023-04-28 15:44:50 +02:00
_H: "{{{"
2023-05-09 09:29:10 +02:00
_block_tag: pair_block_tag
| orphan_block_tag
2023-05-02 10:56:28 +02:00
2023-05-09 15:52:18 +02:00
pair_block_tag: _PAIR_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" start? "</" _PURE_TEXT ">" -> tag
2023-05-03 17:00:15 +02:00
orphan_block_tag: _ORPHAN_TAG_ANGLE "/"? TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
2023-05-02 10:56:28 +02:00
2023-05-09 15:52:18 +02:00
_PAIR_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>[\s\S]*<\/\1>)/i
_ORPHAN_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>)(?![\s\S]*\<\/\1\>)/i
2023-05-02 10:56:28 +02:00
2023-05-09 16:31:34 +02:00
paragraph{begin, text}: _inline{begin} _N? ( ( _inline{text} | _inline_tag ) _N? )*
2023-04-28 15:44:50 +02:00
2023-05-09 16:31:34 +02:00
_inline{text}: _link
2023-04-28 15:44:50 +02:00
| emphasis
| strong
2023-05-02 10:56:28 +02:00
| text
2023-04-28 15:44:50 +02:00
2023-05-09 09:29:10 +02:00
_inline_tag: pair_inline_tag
| short_inline_tag
2023-05-09 10:15:53 +02:00
| orphan_inline_tag
2023-04-28 15:44:50 +02:00
pair_inline_tag: _INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} "</" _PURE_TEXT ">" -> tag
short_inline_tag: _SHORT_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} -> tag
orphan_inline_tag: _ORPHAN_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
2023-05-02 10:56:28 +02:00
2023-05-09 15:52:18 +02:00
_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:|,="\/\-\. ]*>[\s\S]+<\/\1>)/i
_SHORT_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:,="\/\.\|\- ]*>)(?!\/|img|emb|doc|[\s\S]+<\/\1>)/i
_ORPHAN_INLINE_TAG_ANGLE: /<(?=(img|emb|doc)[0-9a-z\|_:,="\/\.\|\- ]*>)/i
2023-04-28 15:44:50 +02:00
2023-05-02 10:56:28 +02:00
_link: footnote
| wikilink
| anchor
2023-04-28 15:44:50 +02:00
2023-05-03 16:32:36 +02:00
footnote: _FOOT FOOTNOTE_CONTENT "]]"
2023-05-02 10:56:28 +02:00
wikilink: _WIKI HREF "]"
2023-05-03 17:00:15 +02:00
anchor: _A A_TEXT "->" " "* HREF " "* "]"
2023-04-28 15:44:50 +02:00
_FOOT: /\[\[/
_WIKI: /\[\?/
_A: /\[(?=[^\[\n\r]+->)/
2023-05-03 16:32:36 +02:00
FOOTNOTE_CONTENT: /[0-9A-Za-z_:\/\-\.\ ]+/
2023-05-03 17:00:15 +02:00
HREF: _PURE_TEXT | _PLACEHOLDER
2023-04-28 15:44:50 +02:00
A_TEXT: /[^\r\n\{]+?(?=->)/
strong: _STRONG ( _inline{MARKED_TEXT} )+ ( "}}" | _N )
emphasis: _EM ( _inline{MARKED_TEXT} )+ ( "}" | _N )
_STRONG: /{{(?=[^\{])/
_EM: /{(?=[^\{])/
2023-04-28 15:44:50 +02:00
PARAGRAPH_TEXT: / [^\r\n|\-{<]
(?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
))*
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \|(?=[^\r\n|])
/x
2023-05-09 09:29:10 +02:00
TEXT: / (?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
2023-05-09 10:15:53 +02:00
))+
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \\{
/x
2023-05-09 09:29:10 +02:00
TABLE_TEXT: /(?:[^\|\r\n\{](?![^\[\n\r]*->))+/
MARKED_TEXT: /(?:[^\}\r\n\{](?![^\[\n\r]*->))+/
2023-04-28 15:44:50 +02:00
TAG_NAME: _PURE_TEXT
TAG_OPTION: _PURE_TEXT
2023-05-03 17:00:15 +02:00
_OPTION_SEP: " "* "|" " "* | " "+
_N: /\r?\n/
2023-05-09 14:57:55 +02:00
_PURE_TEXT: /[0-9A-Za-z_:,=?"\/\-\.]+/
2023-05-09 13:11:30 +02:00
_PLACEHOLDER: /\*+/