From 28ab3123df1fedd0e9402a5fa5e3c2e13171e5a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Fri, 28 Apr 2023 15:44:50 +0200 Subject: [PATCH] switch to new grammar --- spip2md/content.py | 3 +- spip2md/spip.flex.lark | 75 ------------------------ spip2md/spip.lark | 129 +++++++++++++++++------------------------ 3 files changed, 55 insertions(+), 152 deletions(-) delete mode 100644 spip2md/spip.flex.lark diff --git a/spip2md/content.py b/spip2md/content.py index 648f877..815ffb2 100644 --- a/spip2md/content.py +++ b/spip2md/content.py @@ -2,8 +2,7 @@ from os import path from lark import Lark -# spipParser = Lark(open(path.dirname(__file__) + "/spip.lark")) -spipParser = Lark(open(path.dirname(__file__) + "/spip.flex.lark")) +spipParser = Lark(open(path.dirname(__file__) + "/spip.lark")) class content: diff --git a/spip2md/spip.flex.lark b/spip2md/spip.flex.lark deleted file mode 100644 index 3f75835..0000000 --- a/spip2md/spip.flex.lark +++ /dev/null @@ -1,75 +0,0 @@ -// Flexible SPIP Markup grammar for Lark parser -start: _N* block ( _N+ block )* _N* - -?block: paragraph - | heading - | list - | table - | tag - | HORIZONTAL_RULE -> horizontal_rule - -HORIZONTAL_RULE: /----+/ - -?list: unordered_list - | ordered_list - -unordered_list: ( _UL list_item _N )+ -ordered_list: ( _OL list_item _N )+ -list_item: _inline{TEXT}+ -_UL: /-\*|-[^#-]/ -_OL: /-#/ - -table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+ -table_metadata: table_title "|" table_description -table_title: _inline{TABLE_TEXT} -table_description: _inline{TABLE_TEXT} -table_row: ( _TBL table_cell )+ "|" -table_cell: _inline{TABLE_TEXT} -_TBL_META: "||" -_TBL: "|" - -heading: _H _inline{MARKED_TEXT}+ "}}}" -_H: "{{{" - -paragraph: ( _inline{TEXT} _N? )+ - -_inline{text}: text - | emphasis - | strong - | anchor - | tag - -TEXT: /(?:[^\r\n\{](?![^\[\n\r]*->))+/ -TABLE_TEXT: /[^\|\r\n\{]+/ -MARKED_TEXT: /[^\}\r\n\{]+/ - -strong: _B ( _inline{MARKED_TEXT} )+ ( "}}" | _N ) -emphasis: _I ( _inline{MARKED_TEXT} )+ ( "}" | _N ) -_B: /{{(?=[^\{])/ -_I: /{(?=[^\{])/ - -?anchor: anchor_footnote - | anchor_wikipedia - | anchor_normal -> anchor - -anchor_footnote: _FOOT HREF "]]" -anchor_wikipedia: _WIKI HREF "]" -anchor_normal: _A A_TEXT "->" HREF "]" -_FOOT: /\[\[/ -_WIKI: /\[\?/ -_A: /\[(?=[^\[\n\r]+->)/ -HREF: _PURE_TEXT -A_TEXT: /[^\r\n\{]+?(?=->)/ - -tag: end_tag - | start_tag - -end_tag: _ETAG TAG_NAME ( "|" TAG_OPTION )* ">" -start_tag: _STAG TAG_NAME ( "|" TAG_OPTION )* ">" -_STAG: "<" -_ETAG: " hr + | tag + | HORIZONTAL_RULE -> horizontal_rule + +HORIZONTAL_RULE: /----+/ ?list: unordered_list | ordered_list -unordered_list: ( _HYPHEN_STAR list_element _N )+ -> ul -ordered_list: ( _HYPHEN_HASH list_element _N )+ -> ol -list_element: _inline_format -> li +unordered_list: ( _UL list_item _N )+ +ordered_list: ( _OL list_item _N )+ +list_item: _inline{TEXT}+ +_UL: /-\*|-[^#-]/ +_OL: /-#/ -table: ( table_meta _N )? ( row _N )+ -> table -table_meta: _PIPE _PIPE TEXT _PIPE TEXT _PIPE _PIPE -> title_description -row: ( _PIPE cell )+ _PIPE -> tr -cell: _inline_format -> td +table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+ +table_metadata: table_title "|" table_description +table_title: _inline{TABLE_TEXT} +table_description: _inline{TABLE_TEXT} +table_row: ( _TBL table_cell )+ "|" +table_cell: _inline{TABLE_TEXT} +_TBL_META: "||" +_TBL: "|" -heading: _O_CURLY_3 ( TEXT | link | nested_italic | nested_bold ) _C_CURLY_3 -> h2 +heading: _H _inline{MARKED_TEXT}+ "}}}" +_H: "{{{" -paragraph: ( _inline_format _N? )+ -> p +paragraph: ( _inline{TEXT} _N? )+ -_inline_format: TEXT - | italic - | bold - | link - | orphan_tag - | orphan_quote - | tag +_inline{text}: text + | emphasis + | strong + | anchor + | tag -bold: _O_CURLY_2 ( TEXT | link | nested_italic )+ _C_CURLY_2 -> strong -italic: _O_CURLY ( TEXT | link | nested_bold )+ _C_CURLY -> em -nested_bold: TEXT+ _O_CURLY_2 ( TEXT | link )+ _C_CURLY_2 -> strong -nested_italic: TEXT+ _O_CURLY ( TEXT | link )+ _C_CURLY -> em +TEXT: /(?:[^\r\n\{](?![^\[\n\r]*->))+/ +TABLE_TEXT: /[^\|\r\n\{]+/ +MARKED_TEXT: /[^\}\r\n\{]+/ -?link: footnote - | wikipedia_link - | a +strong: _B ( _inline{MARKED_TEXT} )+ ( "}}" | _N ) +emphasis: _I ( _inline{MARKED_TEXT} )+ ( "}" | _N ) +_B: /{{(?=[^\{])/ +_I: /{(?=[^\{])/ -footnote: _O_SQUARE_2 HREF _C_SQUARE_2 -> footnote -wikipedia_link: _O_SQUARE_INTERO HREF _C_SQUARE -> a_wikipedia -a: _LINK_OPENING LINK_TEXT _ARROW HREF _C_SQUARE -> a +?anchor: anchor_footnote + | anchor_wikipedia + | anchor_normal -> anchor -tag: closing_quote - | opening_quote - | closing_tag - | opening_tag +anchor_footnote: _FOOT HREF "]]" +anchor_wikipedia: _WIKI HREF "]" +anchor_normal: _A A_TEXT "->" HREF "]" +_FOOT: /\[\[/ +_WIKI: /\[\?/ +_A: /\[(?=[^\[\n\r]+->)/ +HREF: _PURE_TEXT +A_TEXT: /[^\r\n\{]+?(?=->)/ -orphan_quote: _ORPHAN_OPENING "quote" _C_ANGLE -> orphan_quote -orphan_tag: _ORPHAN_OPENING TEXT ( _PIPE TEXT )* _C_ANGLE -> orphan_tag +tag: end_tag + | start_tag -closing_quote: _O_ANGLE _SLASH "quote" _C_ANGLE -> closing_quote -opening_quote: _O_ANGLE "quote" _C_ANGLE -> opening_quote -closing_tag: _O_ANGLE _SLASH TEXT ( _PIPE TEXT )* _C_ANGLE -> closing_tag -opening_tag: _O_ANGLE TEXT ( _PIPE TEXT )* _C_ANGLE -> opening_tag +end_tag: _ETAG TAG_NAME ( "|" TAG_OPTION )* ">" +start_tag: _STAG TAG_NAME ( "|" TAG_OPTION )* ">" +_STAG: "<" +_ETAG: "" -_ORPHAN_OPENING: /<(?=([^>\/]+?)>)(?!.*<\/\1>)/ - -_O_CURLY_3: "{{{" -_C_CURLY_3: "}}}" -_O_CURLY_2: "{{" -_C_CURLY_2: "}}" -_O_CURLY: "{" -_C_CURLY: "}" - -_O_SQUARE_2: "[[" -_C_SQUARE_2: "]]" -_O_SQUARE_INTERO: "[?" -_C_SQUARE: "]" -_ARROW: "->" -_LINK_OPENING: /\[(?=/ PURE_TEXT+ /]*->)/ -HREF: PURE_TEXT+ -LINK_TEXT: PURE_TEXT+ /(?=->)/ - -/// Characters that could be markup but aren’t in this situation -AMBIGUOUS_TEXT: /\[(?!.*->.*\])/ - | /\]/ - | /\\}/ -/// Every characters that have no markup meaning -PURE_TEXT: /[^\r\n\|\{\}\[\]\<\>]/ +_PURE_TEXT: /[0-9A-Za-z_:\/\-\.]+/