diff --git a/spip2md/metadata.py b/spip2md/Metadata.py similarity index 88% rename from spip2md/metadata.py rename to spip2md/Metadata.py index c5bd014..3116e91 100644 --- a/spip2md/metadata.py +++ b/spip2md/Metadata.py @@ -1,28 +1,18 @@ -import re - import yaml +from convert import convert from slugify import slugify from SpipDatabase import * -def clean(string): - cleaned = string - # Keep only the first lang of multilingual titles - cleaned = re.sub( - r"\s*\[[a-z]{2}\]\s*([^<\[]*)\s*[^<]*\s*<\/multi>", r"\1", cleaned - ) - # cleaned = re.sub(r"<[^\>]*>", r"", cleaned) - return cleaned - - -class metadata: +class Metadata: def __init__(self, article): self.id = article.id_article # self.surtitle = article.surtitre # Probably unused - self.title = clean(article.titre) + # self.title = self.clean(article.titre) + self.title = convert(article.titre) self.subtitle = article.soustitre # Probably unused # self.section = article.id_rubrique # TODO join - self.description = article.descriptif + self.description = convert(article.descriptif) self.caption = article.chapo # Probably unused self.ps = article.ps # Probably unused self.publicationDate = article.date diff --git a/spip2md/content.py b/spip2md/content.py deleted file mode 100644 index e8d38bb..0000000 --- a/spip2md/content.py +++ /dev/null @@ -1,124 +0,0 @@ -import re -from os import path - - -class content: - _mappings = { - "horizontal-rule": ( - re.compile(r"- ?- ?- ?- ?[\- ]*|
", re.S | re.I), - r"---", - ), - "line-break": ( - re.compile(r"\r?\n_ *(?=\r?\n)|
", re.S | re.I), - "\n", - ), - "heading": ( - re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I), - r"## \1", - ), - "strong": ( - re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I), - r"**\1**", - ), - "emphasis": ( - re.compile(r"\{ *(.*?) *\}", re.S | re.I), - r"*\1*", - ), - "strikethrough": ( - re.compile( - r"\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)", - re.S | re.I, - ), - r"~\1~", - ), - "anchor": ( - re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I), - r"[\1](\2)", - ), - "image": ( - re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I), - r"![image](\1)", - ), - "document-anchors": ( - re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I), - r"[document](\1)", - ), - "wikilink": ( - re.compile(r"\[\? *(.*?) *\]", re.S | re.I), - r"[\1](https://wikipedia.org/wiki/\1)", - ), - "footnote": ( - re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I), - r"", - ), - "unordered-list": ( - re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I), - r"\1- ", - ), - "wrong-unordered-list": ( - re.compile(r"(\r?\n)\* +", re.S | re.I), - r"\1- ", - ), - "ordered-list": ( - re.compile(r"(\r?\n)-# *", re.S | re.I), - r"\g<1>1. ", - ), - "table-metadata": ( - re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I), - r"", - ), - "quote": ( - re.compile( - r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)", - re.S | re.I, - ), - r"> \1\2\2", - ), - "box": ( - re.compile( - r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)", - re.S | re.I, - ), - "`\\1`", - ), - "fence": ( - re.compile( - r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)", - re.S | re.I, - ), - "```\n\\1\n\n```", - ), - "multi-language": ( # Keep only the first language - re.compile( - r"\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>", - re.S | re.I, - ), - r"\1", - ), - } - - def __init__(self, spip): - self.markup = spip - - def get_markdown(self): - for spip, markdown in self._mappings.values(): - self.markup = spip.sub(markdown, self.markup) - return self.markup.encode("utf-8").decode("utf-8") - - -# Parses a file & display its parse tree -def test(filename): - raw = open(path.dirname(__file__) + "/" + filename).read() - - print(f"--- Conversion of {filename} ---\n\n") - c = content(raw) - print(c.get_markdown()) - - -if __name__ == "__main__": - # Test - test("../test/0.spip") - test("../test/1.spip") - test("../test/2.spip") - test("../test/3.spip") - test("../test/4.spip") diff --git a/spip2md/convert.py b/spip2md/convert.py new file mode 100644 index 0000000..9b63080 --- /dev/null +++ b/spip2md/convert.py @@ -0,0 +1,101 @@ +import re + +mappings = { + "horizontal-rule": ( + re.compile(r"- ?- ?- ?- ?[\- ]*|
", re.S | re.I), + r"---", + ), + "line-break": ( + re.compile(r"\r?\n_ *(?=\r?\n)|
", re.S | re.I), + "\n", + ), + "heading": ( + re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I), + r"## \1", + ), + "strong": ( + re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I), + r"**\1**", + ), + "emphasis": ( + re.compile(r"\{ *(.*?) *\}", re.S | re.I), + r"*\1*", + ), + "strikethrough": ( + re.compile( + r"\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)", + re.S | re.I, + ), + r"~\1~", + ), + "anchor": ( + re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I), + r"[\1](\2)", + ), + "image": ( + re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I), + r"![image](\1)", + ), + "document-anchors": ( + re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I), + r"[document](\1)", + ), + "wikilink": ( + re.compile(r"\[\? *(.*?) *\]", re.S | re.I), + r"[\1](https://wikipedia.org/wiki/\1)", + ), + "footnote": ( + re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I), + r"", + ), + "unordered-list": ( + re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I), + r"\1- ", + ), + "wrong-unordered-list": ( + re.compile(r"(\r?\n)\* +", re.S | re.I), + r"\1- ", + ), + "ordered-list": ( + re.compile(r"(\r?\n)-# *", re.S | re.I), + r"\g<1>1. ", + ), + "table-metadata": ( + re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I), + r"", + ), + "quote": ( + re.compile( + r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)", + re.S | re.I, + ), + r"> \1\2\2", + ), + "box": ( + re.compile( + r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)", + re.S | re.I, + ), + "`\\1`", + ), + "fence": ( + re.compile( + r"\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)", + re.S | re.I, + ), + "```\n\\1\n\n```", + ), + "multi-language": ( # Keep only the first language + re.compile( + r"\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>", + re.S | re.I, + ), + r"\1", + ), +} + + +def convert(markup): + for spip, markdown in mappings.values(): + markup = spip.sub(markdown, markup) + return markup.encode("utf-8").decode("utf-8") diff --git a/spip2md/spip2md.py b/spip2md/spip2md.py index a182f33..6dd3b42 100755 --- a/spip2md/spip2md.py +++ b/spip2md/spip2md.py @@ -4,8 +4,8 @@ from os import mkdir from shutil import rmtree from config import CONFIG -from content import content -from metadata import metadata +from convert import convert +from Metadata import Metadata from SpipDatabase import * # Clean the output dir & create a new @@ -39,12 +39,11 @@ for exported in range(nbToExport): if exported > 0 and exported % 10 == 0: print(f"\n--- {nbToExport - exported} articles remaining ---\n") article = articles[exported] - meta = metadata(article) + meta = Metadata(article) print(f"{exported+1}. Exporting {meta.title}") print(f" to {meta.get_slug()}/index.md") articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug()) - body = content(article.texte) mkdir(articleDir) with open("{}/index.md".format(articleDir), "w") as f: @@ -52,7 +51,7 @@ for exported in range(nbToExport): "{}\n\n{}\n{}\n{}".format( meta.get_frontmatter(), meta.get_starting(), - body.get_markdown(), + convert(article.texte), meta.get_ending(), ) )