delete lark syntax

This commit is contained in:
Guilhem Fauré 2023-05-11 13:45:50 +02:00
parent ca4a3c1a96
commit 3e3259c564
2 changed files with 4 additions and 110 deletions

View File

@ -8,8 +8,7 @@ class metadata:
def __init__(self, article):
self.id = article.id_article
# self.surtitle = article.surtitre # Probably unused
self.title = "title"
# self.title = convert(article.titre)
self.title = convert(article.titre)
self.subtitle = article.soustitre # Probably unused
# self.section = article.id_rubrique # TODO join
self.description = convert(article.descriptif)
@ -63,7 +62,9 @@ class metadata:
def get_starting(self):
return (
# f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n"
f"{self.caption}\n" if len(self.caption) > 0 else ""
f"{self.caption}\n"
if len(self.caption) > 0
else ""
)
# Contains things after the article like ps & extra

View File

@ -1,107 +0,0 @@
// Flexible SPIP Markup grammar for Lark parser
start: _N* block ( _N+ block )* _N*
?block: HORIZONTAL_RULE
| heading
| list
| table
| _block_tag
| paragraph{PARAGRAPH_TEXT, TEXT}
HORIZONTAL_RULE: /- *- *- *- *[\- ]*/
?list: unordered_list
| ordered_list
unordered_list: ( _UL list_item _N )* _UL list_item _N?
ordered_list: ( _OL list_item _N )* _OL list_item _N?
list_item: _inline{TEXT}+
_UL: /-\*|-(?!#|---)/
_OL: /-#/
table: ( _TBL_META table_metadata "||" _N )? ( table_row _N )+
table_metadata: table_title "|" table_description
table_title: _inline{TABLE_TEXT}
table_description: _inline{TABLE_TEXT}
table_row: ( _TBL table_cell )+ "|"
table_cell: _inline{TABLE_TEXT}
_TBL_META: /\|\|(?=[^\r\n]+\|\|)/
_TBL: /\|(?=[^\r\n]+\|)/
heading: _H paragraph{MARKED_TEXT, MARKED_TEXT} "}}}"
_H: "{{{"
_block_tag: pair_block_tag
| orphan_block_tag
pair_block_tag: _PAIR_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" start? "</" _PURE_TEXT ">" -> tag
orphan_block_tag: _ORPHAN_TAG_ANGLE "/"? TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
_PAIR_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>[\s\S]*<\/\1>)/i
_ORPHAN_TAG_ANGLE: /<(?=([0-9a-z]+)[^{}<>\[\]]*>)(?![\s\S]*\<\/\1\>)/i
paragraph{begin, text}: _inline{begin} _N? ( ( _inline{text} | _inline_tag ) _N? )*
_inline{text}: _link
| emphasis
| strong
| text
_inline_tag: pair_inline_tag
| short_inline_tag
| orphan_inline_tag
pair_inline_tag: _INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} "</" _PURE_TEXT ">" -> tag
short_inline_tag: _SHORT_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" _N? paragraph{TEXT, TEXT} -> tag
orphan_inline_tag: _ORPHAN_INLINE_TAG_ANGLE TAG_NAME ( _OPTION_SEP TAG_OPTION )* ">" -> orphan_tag
_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:|,="\/\-\. ]*>[\s\S]+<\/\1>)/i
_SHORT_INLINE_TAG_ANGLE: /<(?=([0-9a-z]+)[0-9a-z_:,="\/\.\|\- ]*>)(?!\/|img|emb|doc|[\s\S]+<\/\1>)/i
_ORPHAN_INLINE_TAG_ANGLE: /<(?=(img|emb|doc)[0-9a-z\|_:,="\/\.\|\- ]*>)/i
_link: footnote
| wikilink
| anchor
footnote: _FOOT FOOTNOTE_CONTENT "]]"
wikilink: _WIKI HREF "]"
anchor: _A A_TEXT "->" " "* HREF " "* "]"
_FOOT: /\[\[/
_WIKI: /\[\?/
_A: /\[(?=[^\[\n\r]+->)/
FOOTNOTE_CONTENT: /[0-9A-Za-z_:\/\-\.\ ]+/
HREF: _PURE_TEXT | _PLACEHOLDER
A_TEXT: /[^\r\n\{]+?(?=->)/
strong: _STRONG ( _inline{MARKED_TEXT} )+ ( "}}" | _N )
emphasis: _EM ( _inline{MARKED_TEXT} )+ ( "}" | _N )
_STRONG: /{{(?=[^\{])/
_EM: /{(?=[^\{])/
PARAGRAPH_TEXT: / [^\r\n|\-{<]
(?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
))*
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \|(?=[^\r\n|])
/x
TEXT: / (?:[^\r\n{<](?!
[^\[\r\n]*->
|\?[^\[\r\n]*\]
|\[[^\[\r\n]*\]\]
))+
| (?:\<(?![0-9A-Za-z_:|,=\/\-\. ]+\>))+
| \\{
/x
TABLE_TEXT: /(?:[^\|\r\n\{](?![^\[\n\r]*->))+/
MARKED_TEXT: /(?:[^\}\r\n\{](?![^\[\n\r]*->))+/
TAG_NAME: _PURE_TEXT
TAG_OPTION: _PURE_TEXT
_OPTION_SEP: " "* "|" " "* | " "+
_N: /\r?\n/
_PURE_TEXT: /[0-9A-Za-z_:,=?"\/\-\.]+/
_PLACEHOLDER: /\*+/