From c399439e320aca309a6feb3058149e243ccaf325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Tue, 25 Apr 2023 15:50:08 +0200 Subject: [PATCH] lark syntax in separate file --- spip2md/content.py | 59 +++------------------------------------------- spip2md/spip.lark | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 56 deletions(-) create mode 100644 spip2md/spip.lark diff --git a/spip2md/content.py b/spip2md/content.py index dee683e..f9d5daa 100644 --- a/spip2md/content.py +++ b/spip2md/content.py @@ -1,61 +1,8 @@ +from os import path + from lark import Lark -spipParser = Lark( - r""" - section: /\n\r?/ - ( paragraph - | heading - | list - | table - | quote - | SEPARATOR - ) /\n\r?/ - - paragraph: format_text - - heading: "{{{" format_text "}}}" - - list: unordered_list - | unordered_sublist - | ordered_list - | ordered_sublist - - unordered_list: (/\n\r?-* / format_text)+ - unordered_sublist: (/\n\r?-*{2,7} / format_text)+ - ordered_list: (\/n/r?-# / format_text)+ - ordered_sublist: (\/n/r?-#{2,7} / format_text)+ - - table: row+ - row: /\n\r?\|/ cell+ - cell: format_text "|" - - quote: "" format_text "" - - format_text: ( - | link - | PURE_TEXT - )+ - - italic: "{" PURE_TEXT "}" - bold: "{{" PURE_TEXT "}}" - bold_italic: "{{ {" PURE_TEXT "} }}" | "{ {{" PURE_TEXT "}} }" - - link: internal_link - | external_link - | footnote - | glossary - - internal_link: "[" PURE_TEXT "->" PURE_TEXT "]" - external_link: "[" PURE_TEXT "->" /[a-z]{3,6}:\/\// PURE_TEXT "]" - footnote: "[[" PURE_TEXT "]]" - footnote: "[?" PURE_TEXT "]" - - PURE_TEXT: /[^\s\{\-\|\<\[\}\>\]][^\n\r\{\<\[\}\]]*/ - - SEPARATOR: /-{4,}/ -""", - start="section", -) +spipParser = Lark(open(path.dirname(__file__) + "/spip.lark"), start="section") class content: diff --git a/spip2md/spip.lark b/spip2md/spip.lark new file mode 100644 index 0000000..4d19c25 --- /dev/null +++ b/spip2md/spip.lark @@ -0,0 +1,50 @@ +section: /\n\r?/ + ( paragraph + | heading + | list + | table + | quote + | SEPARATOR + ) /\n\r?/ + +paragraph: format_text + +heading: "{{{" format_text "}}}" + +list: unordered_list + | unordered_sublist + | ordered_list + | ordered_sublist + +unordered_list: (/\n\r?-* / format_text)+ +unordered_sublist: (/\n\r?-*{2,7} / format_text)+ +ordered_list: (\/n/r?-# / format_text)+ +ordered_sublist: (\/n/r?-#{2,7} / format_text)+ + +table: row+ +row: /\n\r?\|/ cell+ +cell: format_text "|" + +quote: "" format_text "" + +format_text: ( link + | PURE_TEXT + )+ + +italic: "{" PURE_TEXT "}" +bold: "{{" PURE_TEXT "}}" +bold_italic: "{{ {" PURE_TEXT "} }}" | "{ {{" PURE_TEXT "}} }" + +link: internal_link + | external_link + | footnote + | glossary + +internal_link: "[" PURE_TEXT "->" PURE_TEXT "]" +external_link: "[" PURE_TEXT "->" /[a-z]{3,6}:\/\// PURE_TEXT "]" +footnote: "[[" PURE_TEXT "]]" +footnote: "[?" PURE_TEXT "]" + +PURE_TEXT: /[^\s\{\-\|\<\[\}\>\]][^\n\r\{\<\[\}\]]*/ + +SEPARATOR: /-{4,}/