From 1e59bb68331892ffd3e96e105fb93594bfc4c59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Fri, 28 Apr 2023 15:43:43 +0200 Subject: [PATCH] new more flexible grammar --- spip2md/spip.flex.lark | 69 ++++++++++++++++++------------------------ test/0.spip | 6 ++-- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/spip2md/spip.flex.lark b/spip2md/spip.flex.lark index 861125a..3f75835 100644 --- a/spip2md/spip.flex.lark +++ b/spip2md/spip.flex.lark @@ -1,5 +1,5 @@ // Flexible SPIP Markup grammar for Lark parser -start: _N* block ( _N+ block )+ _N* +start: _N* block ( _N+ block )* _N* ?block: paragraph | heading @@ -15,51 +15,38 @@ HORIZONTAL_RULE: /----+/ unordered_list: ( _UL list_item _N )+ ordered_list: ( _OL list_item _N )+ -list_item: _inline -_UL: /-|-\*/ +list_item: _inline{TEXT}+ +_UL: /-\*|-[^#-]/ _OL: /-#/ table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+ table_metadata: table_title "|" table_description -table_title: _table_inline -table_description: _table_inline +table_title: _inline{TABLE_TEXT} +table_description: _inline{TABLE_TEXT} table_row: ( _TBL table_cell )+ "|" -table_cell: _table_inline +table_cell: _inline{TABLE_TEXT} _TBL_META: "||" _TBL: "|" -heading: _H _markup_inline "}}}" +heading: _H _inline{MARKED_TEXT}+ "}}}" _H: "{{{" -paragraph.-1: ( _inline _N? )+ +paragraph: ( _inline{TEXT} _N? )+ -_inline: TEXT - | emphasis - | strong - | anchor - | tag - -TEXT.-1: /[^\r\n]+/ - -_table_inline: TABLE_TEXT +_inline{text}: text | emphasis | strong | anchor + | tag -TABLE_TEXT: /[^\r\n|]+(?=[\{\[\|])/ +TEXT: /(?:[^\r\n\{](?![^\[\n\r]*->))+/ +TABLE_TEXT: /[^\|\r\n\{]+/ +MARKED_TEXT: /[^\}\r\n\{]+/ -_markup_inline: MARKUP_TEXT - | emphasis - | strong - | anchor - | tag - -MARKUP_TEXT.-1: /[^\r\n\}]+/ - -strong: _B ( _markup_inline )+ "}}" -emphasis: _I ( _markup_inline )+ "}" -_B: "{{" -_I: "{" +strong: _B ( _inline{MARKED_TEXT} )+ ( "}}" | _N ) +emphasis: _I ( _inline{MARKED_TEXT} )+ ( "}" | _N ) +_B: /{{(?=[^\{])/ +_I: /{(?=[^\{])/ ?anchor: anchor_footnote | anchor_wikipedia @@ -68,19 +55,21 @@ _I: "{" anchor_footnote: _FOOT HREF "]]" anchor_wikipedia: _WIKI HREF "]" anchor_normal: _A A_TEXT "->" HREF "]" -_FOOT: "[[" -_WIKI: "[?" -_A: /\[(?=[^\n\r]+->)/ -HREF: /[^\r\n\]]+/ -A_TEXT: /[^\r\n]+(?=->)/ +_FOOT: /\[\[/ +_WIKI: /\[\?/ +_A: /\[(?=[^\[\n\r]+->)/ +HREF: _PURE_TEXT +A_TEXT: /[^\r\n\{]+?(?=->)/ tag: end_tag | start_tag -end_tag: _E_TAG TAG_TEXT ( "|" TAG_TEXT )* ">" -start_tag: _S_TAG TAG_TEXT ( "|" TAG_TEXT )* ">" -_S_TAG: "<" -_E_TAG: "]+/ +end_tag: _ETAG TAG_NAME ( "|" TAG_OPTION )* ">" +start_tag: _STAG TAG_NAME ( "|" TAG_OPTION )* ">" +_STAG: "<" +_ETAG: "https://lark-parser.readthedocs.io] dans un {paragraphe} en itali avec un peu de {{gras}}, voire même du { {{gras}} dans de l’{{italique}} (en gras)}. Conclut par un {{ {Lorem ipsum} }} dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. -Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia {{pariatur}} ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. +Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est accolade impromptue } proident. Nostrud officia {{pariatur}} ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. -Culpa proident adipisicing id {nulla} nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. +Culpa proident adipisicing id {nulla} nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non [excepteur] duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. ----