simplified architecture

This commit is contained in:
Guilhem Fauré 2023-05-11 09:50:18 +02:00
parent 5e86ed0ed5
commit 723a7ddeea
4 changed files with 110 additions and 144 deletions

View File

@ -1,28 +1,18 @@
import re
import yaml import yaml
from convert import convert
from slugify import slugify from slugify import slugify
from SpipDatabase import * from SpipDatabase import *
def clean(string): class Metadata:
cleaned = string
# Keep only the first lang of multilingual titles
cleaned = re.sub(
r"<multi>\s*\[[a-z]{2}\]\s*([^<\[]*)\s*[^<]*\s*<\/multi>", r"\1", cleaned
)
# cleaned = re.sub(r"<[^\>]*>", r"", cleaned)
return cleaned
class metadata:
def __init__(self, article): def __init__(self, article):
self.id = article.id_article self.id = article.id_article
# self.surtitle = article.surtitre # Probably unused # self.surtitle = article.surtitre # Probably unused
self.title = clean(article.titre) # self.title = self.clean(article.titre)
self.title = convert(article.titre)
self.subtitle = article.soustitre # Probably unused self.subtitle = article.soustitre # Probably unused
# self.section = article.id_rubrique # TODO join # self.section = article.id_rubrique # TODO join
self.description = article.descriptif self.description = convert(article.descriptif)
self.caption = article.chapo # Probably unused self.caption = article.chapo # Probably unused
self.ps = article.ps # Probably unused self.ps = article.ps # Probably unused
self.publicationDate = article.date self.publicationDate = article.date

View File

@ -1,124 +0,0 @@
import re
from os import path
class content:
_mappings = {
"horizontal-rule": (
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
r"---",
),
"line-break": (
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
"\n",
),
"heading": (
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
r"## \1",
),
"strong": (
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
r"**\1**",
),
"emphasis": (
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
r"*\1*",
),
"strikethrough": (
re.compile(
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
re.S | re.I,
),
r"~\1~",
),
"anchor": (
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
r"[\1](\2)",
),
"image": (
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
r"![image](\1)",
),
"document-anchors": (
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
r"[document](\1)",
),
"wikilink": (
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
r"[\1](https://wikipedia.org/wiki/\1)",
),
"footnote": (
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
r"",
),
"unordered-list": (
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
r"\1- ",
),
"wrong-unordered-list": (
re.compile(r"(\r?\n)\* +", re.S | re.I),
r"\1- ",
),
"ordered-list": (
re.compile(r"(\r?\n)-# *", re.S | re.I),
r"\g<1>1. ",
),
"table-metadata": (
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
r"",
),
"quote": (
re.compile(
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
re.S | re.I,
),
r"> \1\2\2",
),
"box": (
re.compile(
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
re.S | re.I,
),
"`\\1`",
),
"fence": (
re.compile(
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
re.S | re.I,
),
"```\n\\1\n\n```",
),
"multi-language": ( # Keep only the first language
re.compile(
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
re.S | re.I,
),
r"\1",
),
}
def __init__(self, spip):
self.markup = spip
def get_markdown(self):
for spip, markdown in self._mappings.values():
self.markup = spip.sub(markdown, self.markup)
return self.markup.encode("utf-8").decode("utf-8")
# Parses a file & display its parse tree
def test(filename):
raw = open(path.dirname(__file__) + "/" + filename).read()
print(f"--- Conversion of {filename} ---\n\n")
c = content(raw)
print(c.get_markdown())
if __name__ == "__main__":
# Test
test("../test/0.spip")
test("../test/1.spip")
test("../test/2.spip")
test("../test/3.spip")
test("../test/4.spip")

101
spip2md/convert.py Normal file
View File

@ -0,0 +1,101 @@
import re
mappings = {
"horizontal-rule": (
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
r"---",
),
"line-break": (
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
"\n",
),
"heading": (
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
r"## \1",
),
"strong": (
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
r"**\1**",
),
"emphasis": (
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
r"*\1*",
),
"strikethrough": (
re.compile(
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
re.S | re.I,
),
r"~\1~",
),
"anchor": (
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
r"[\1](\2)",
),
"image": (
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
r"![image](\1)",
),
"document-anchors": (
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
r"[document](\1)",
),
"wikilink": (
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
r"[\1](https://wikipedia.org/wiki/\1)",
),
"footnote": (
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
r"",
),
"unordered-list": (
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
r"\1- ",
),
"wrong-unordered-list": (
re.compile(r"(\r?\n)\* +", re.S | re.I),
r"\1- ",
),
"ordered-list": (
re.compile(r"(\r?\n)-# *", re.S | re.I),
r"\g<1>1. ",
),
"table-metadata": (
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
r"",
),
"quote": (
re.compile(
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
re.S | re.I,
),
r"> \1\2\2",
),
"box": (
re.compile(
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
re.S | re.I,
),
"`\\1`",
),
"fence": (
re.compile(
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
re.S | re.I,
),
"```\n\\1\n\n```",
),
"multi-language": ( # Keep only the first language
re.compile(
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
re.S | re.I,
),
r"\1",
),
}
def convert(markup):
for spip, markdown in mappings.values():
markup = spip.sub(markdown, markup)
return markup.encode("utf-8").decode("utf-8")

View File

@ -4,8 +4,8 @@ from os import mkdir
from shutil import rmtree from shutil import rmtree
from config import CONFIG from config import CONFIG
from content import content from convert import convert
from metadata import metadata from Metadata import Metadata
from SpipDatabase import * from SpipDatabase import *
# Clean the output dir & create a new # Clean the output dir & create a new
@ -39,12 +39,11 @@ for exported in range(nbToExport):
if exported > 0 and exported % 10 == 0: if exported > 0 and exported % 10 == 0:
print(f"\n--- {nbToExport - exported} articles remaining ---\n") print(f"\n--- {nbToExport - exported} articles remaining ---\n")
article = articles[exported] article = articles[exported]
meta = metadata(article) meta = Metadata(article)
print(f"{exported+1}. Exporting {meta.title}") print(f"{exported+1}. Exporting {meta.title}")
print(f" to {meta.get_slug()}/index.md") print(f" to {meta.get_slug()}/index.md")
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug()) articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
body = content(article.texte)
mkdir(articleDir) mkdir(articleDir)
with open("{}/index.md".format(articleDir), "w") as f: with open("{}/index.md".format(articleDir), "w") as f:
@ -52,7 +51,7 @@ for exported in range(nbToExport):
"{}\n\n{}\n{}\n{}".format( "{}\n\n{}\n{}\n{}".format(
meta.get_frontmatter(), meta.get_frontmatter(),
meta.get_starting(), meta.get_starting(),
body.get_markdown(), convert(article.texte),
meta.get_ending(), meta.get_ending(),
) )
) )