From 3e3259c564c8133dd624d0566b5f97cb7bf63e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Thu, 11 May 2023 13:45:50 +0200 Subject: [PATCH] delete lark syntax --- spip2md/Metadata.py | 7 +-- spip2md/spip.lark | 107 -------------------------------------------- 2 files changed, 4 insertions(+), 110 deletions(-) delete mode 100644 spip2md/spip.lark diff --git a/spip2md/Metadata.py b/spip2md/Metadata.py index acba627..68a3a5c 100644 --- a/spip2md/Metadata.py +++ b/spip2md/Metadata.py @@ -8,8 +8,7 @@ class metadata: def __init__(self, article): self.id = article.id_article # self.surtitle = article.surtitre # Probably unused - self.title = "title" - # self.title = convert(article.titre) + self.title = convert(article.titre) self.subtitle = article.soustitre # Probably unused # self.section = article.id_rubrique # TODO join self.description = convert(article.descriptif) @@ -63,7 +62,9 @@ class metadata: def get_starting(self): return ( # f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n" - f"{self.caption}\n" if len(self.caption) > 0 else "" + f"{self.caption}\n" + if len(self.caption) > 0 + else "" ) # Contains things after the article like ps & extra diff --git a/spip2md/spip.lark b/spip2md/spip.lark deleted file mode 100644 index c585bed..0000000 --- a/spip2md/spip.lark +++ /dev/null @@ -1,107 +0,0 @@ -// Flexible SPIP Markup grammar for Lark parser -start: _N* block ( _N+ block )* _N* - -?block: HORIZONTAL_RULE - | heading - | list - | table - | _block_tag - | paragraph{PARAGRAPH_TEXT, TEXT} - -HORIZONTAL_RULE: /- *- *- *- *[\- ]*/ - -?list: unordered_list - | ordered_list - -unordered_list: ( _UL list_item _N )* _UL list_item _N? -ordered_list: ( _OL list_item _N )* _OL list_item _N? -list_item: _inline{TEXT}+ -_UL: /-\*|-(?!#|---)/ -_OL: /-#/ - -table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+ -table_metadata: table_title "|" table_description -table_title: _inline{TABLE_TEXT} -table_description: _inline{TABLE_TEXT} -table_row: ( _TBL table_cell )+ "|" -table_cell: _inline{TABLE_TEXT} -_TBL_META: /\|\|(?=[^\r\n]+\|\|)/ -_TBL: /\|(?=[^\r\n]+\|)/ - -heading: _H paragraph{MARKED_TEXT, MARKED_TEXT} "}}}" -_H: "{{{" - -_block_tag: pair_block_tag - | orphan_block_tag - -pair_block_tag: _PAIR_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" start? "" -> tag -orphan_block_tag: _ORPHAN_TAG_ANGLE "/"? TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag - -_PAIR_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>[\s\S]*<\/\1>)/i -_ORPHAN_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>)(?![\s\S]*\<\/\1\>)/i - -paragraph{begin, text}: _inline{begin} _N? ( ( _inline{text} | _inline_tag ) _N? )* - -_inline{text}: _link - | emphasis - | strong - | text - -_inline_tag: pair_inline_tag - | short_inline_tag - | orphan_inline_tag - -pair_inline_tag: _INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} "" -> tag -short_inline_tag: _SHORT_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} -> tag -orphan_inline_tag: _ORPHAN_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag - -_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:|,="\/\-\. ]*>[\s\S]+<\/\1>)/i -_SHORT_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:,="\/\.\|\- ]*>)(?!\/|img|emb|doc|[\s\S]+<\/\1>)/i -_ORPHAN_INLINE_TAG_ANGLE: /<(?=(img|emb|doc)[0-9a-z\|_:,="\/\.\|\- ]*>)/i - -_link: footnote - | wikilink - | anchor - -footnote: _FOOT FOOTNOTE_CONTENT "]]" -wikilink: _WIKI HREF "]" -anchor: _A A_TEXT "->" " "* HREF " "* "]" -_FOOT: /\[\[/ -_WIKI: /\[\?/ -_A: /\[(?=[^\[\n\r]+->)/ -FOOTNOTE_CONTENT: /[0-9A-Za-z_:\/\-\.\ ]+/ -HREF: _PURE_TEXT | _PLACEHOLDER -A_TEXT: /[^\r\n\{]+?(?=->)/ - -strong: _STRONG ( _inline{MARKED_TEXT} )+ ( "}}" | _N ) -emphasis: _EM ( _inline{MARKED_TEXT} )+ ( "}" | _N ) -_STRONG: /{{(?=[^\{])/ -_EM: /{(?=[^\{])/ - -PARAGRAPH_TEXT: / [^\r\n|\-{<] - (?:[^\r\n{<](?! - [^\[\r\n]*-> - |\?[^\[\r\n]*\] - |\[[^\[\r\n]*\]\] - ))* - | (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+ - | \|(?=[^\r\n|]) - /x - -TEXT: / (?:[^\r\n{<](?! - [^\[\r\n]*-> - |\?[^\[\r\n]*\] - |\[[^\[\r\n]*\]\] - ))+ - | (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+ - | \\{ - /x -TABLE_TEXT: /(?:[^\|\r\n\{](?![^\[\n\r]*->))+/ -MARKED_TEXT: /(?:[^\}\r\n\{](?![^\[\n\r]*->))+/ -TAG_NAME: _PURE_TEXT -TAG_OPTION: _PURE_TEXT - -_OPTION_SEP: " "* "|" " "* | " "+ -_N: /\r?\n/ -_PURE_TEXT: /[0-9A-Za-z_:,=?"\/\-\.]+/ -_PLACEHOLDER: /\*+/