simplified architecture

This commit is contained in:
Guilhem Fauré 2023-05-11 09:50:18 +02:00
parent 5e86ed0ed5
commit 723a7ddeea
4 changed files with 110 additions and 144 deletions

View File

@ -1,28 +1,18 @@
import re
import yaml
from convert import convert
from slugify import slugify
from SpipDatabase import *
def clean(string):
cleaned = string
# Keep only the first lang of multilingual titles
cleaned = re.sub(
r"<multi>\s*\[[a-z]{2}\]\s*([^<\[]*)\s*[^<]*\s*<\/multi>", r"\1", cleaned
)
# cleaned = re.sub(r"<[^\>]*>", r"", cleaned)
return cleaned
class metadata:
class Metadata:
def __init__(self, article):
self.id = article.id_article
# self.surtitle = article.surtitre # Probably unused
self.title = clean(article.titre)
# self.title = self.clean(article.titre)
self.title = convert(article.titre)
self.subtitle = article.soustitre # Probably unused
# self.section = article.id_rubrique # TODO join
self.description = article.descriptif
self.description = convert(article.descriptif)
self.caption = article.chapo # Probably unused
self.ps = article.ps # Probably unused
self.publicationDate = article.date

View File

@ -1,124 +0,0 @@
import re
from os import path
class content:
_mappings = {
"horizontal-rule": (
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
r"---",
),
"line-break": (
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
"\n",
),
"heading": (
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
r"## \1",
),
"strong": (
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
r"**\1**",
),
"emphasis": (
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
r"*\1*",
),
"strikethrough": (
re.compile(
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
re.S | re.I,
),
r"~\1~",
),
"anchor": (
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
r"[\1](\2)",
),
"image": (
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
r"![image](\1)",
),
"document-anchors": (
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
r"[document](\1)",
),
"wikilink": (
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
r"[\1](https://wikipedia.org/wiki/\1)",
),
"footnote": (
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
r"",
),
"unordered-list": (
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
r"\1- ",
),
"wrong-unordered-list": (
re.compile(r"(\r?\n)\* +", re.S | re.I),
r"\1- ",
),
"ordered-list": (
re.compile(r"(\r?\n)-# *", re.S | re.I),
r"\g<1>1. ",
),
"table-metadata": (
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
r"",
),
"quote": (
re.compile(
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
re.S | re.I,
),
r"> \1\2\2",
),
"box": (
re.compile(
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
re.S | re.I,
),
"`\\1`",
),
"fence": (
re.compile(
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
re.S | re.I,
),
"```\n\\1\n\n```",
),
"multi-language": ( # Keep only the first language
re.compile(
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
re.S | re.I,
),
r"\1",
),
}
def __init__(self, spip):
self.markup = spip
def get_markdown(self):
for spip, markdown in self._mappings.values():
self.markup = spip.sub(markdown, self.markup)
return self.markup.encode("utf-8").decode("utf-8")
# Parses a file & display its parse tree
def test(filename):
raw = open(path.dirname(__file__) + "/" + filename).read()
print(f"--- Conversion of {filename} ---\n\n")
c = content(raw)
print(c.get_markdown())
if __name__ == "__main__":
# Test
test("../test/0.spip")
test("../test/1.spip")
test("../test/2.spip")
test("../test/3.spip")
test("../test/4.spip")

101
spip2md/convert.py Normal file
View File

@ -0,0 +1,101 @@
import re
mappings = {
"horizontal-rule": (
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
r"---",
),
"line-break": (
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
"\n",
),
"heading": (
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
r"## \1",
),
"strong": (
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
r"**\1**",
),
"emphasis": (
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
r"*\1*",
),
"strikethrough": (
re.compile(
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
re.S | re.I,
),
r"~\1~",
),
"anchor": (
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
r"[\1](\2)",
),
"image": (
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
r"![image](\1)",
),
"document-anchors": (
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
r"[document](\1)",
),
"wikilink": (
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
r"[\1](https://wikipedia.org/wiki/\1)",
),
"footnote": (
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
r"",
),
"unordered-list": (
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
r"\1- ",
),
"wrong-unordered-list": (
re.compile(r"(\r?\n)\* +", re.S | re.I),
r"\1- ",
),
"ordered-list": (
re.compile(r"(\r?\n)-# *", re.S | re.I),
r"\g<1>1. ",
),
"table-metadata": (
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
r"",
),
"quote": (
re.compile(
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
re.S | re.I,
),
r"> \1\2\2",
),
"box": (
re.compile(
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
re.S | re.I,
),
"`\\1`",
),
"fence": (
re.compile(
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
re.S | re.I,
),
"```\n\\1\n\n```",
),
"multi-language": ( # Keep only the first language
re.compile(
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
re.S | re.I,
),
r"\1",
),
}
def convert(markup):
for spip, markdown in mappings.values():
markup = spip.sub(markdown, markup)
return markup.encode("utf-8").decode("utf-8")

View File

@ -4,8 +4,8 @@ from os import mkdir
from shutil import rmtree
from config import CONFIG
from content import content
from metadata import metadata
from convert import convert
from Metadata import Metadata
from SpipDatabase import *
# Clean the output dir & create a new
@ -39,12 +39,11 @@ for exported in range(nbToExport):
if exported > 0 and exported % 10 == 0:
print(f"\n--- {nbToExport - exported} articles remaining ---\n")
article = articles[exported]
meta = metadata(article)
meta = Metadata(article)
print(f"{exported+1}. Exporting {meta.title}")
print(f" to {meta.get_slug()}/index.md")
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
body = content(article.texte)
mkdir(articleDir)
with open("{}/index.md".format(articleDir), "w") as f:
@ -52,7 +51,7 @@ for exported in range(nbToExport):
"{}\n\n{}\n{}\n{}".format(
meta.get_frontmatter(),
meta.get_starting(),
body.get_markdown(),
convert(article.texte),
meta.get_ending(),
)
)