simplified architecture
This commit is contained in:
parent
5e86ed0ed5
commit
723a7ddeea
@ -1,28 +1,18 @@
|
||||
import re
|
||||
|
||||
import yaml
|
||||
from convert import convert
|
||||
from slugify import slugify
|
||||
from SpipDatabase import *
|
||||
|
||||
|
||||
def clean(string):
|
||||
cleaned = string
|
||||
# Keep only the first lang of multilingual titles
|
||||
cleaned = re.sub(
|
||||
r"<multi>\s*\[[a-z]{2}\]\s*([^<\[]*)\s*[^<]*\s*<\/multi>", r"\1", cleaned
|
||||
)
|
||||
# cleaned = re.sub(r"<[^\>]*>", r"", cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
class metadata:
|
||||
class Metadata:
|
||||
def __init__(self, article):
|
||||
self.id = article.id_article
|
||||
# self.surtitle = article.surtitre # Probably unused
|
||||
self.title = clean(article.titre)
|
||||
# self.title = self.clean(article.titre)
|
||||
self.title = convert(article.titre)
|
||||
self.subtitle = article.soustitre # Probably unused
|
||||
# self.section = article.id_rubrique # TODO join
|
||||
self.description = article.descriptif
|
||||
self.description = convert(article.descriptif)
|
||||
self.caption = article.chapo # Probably unused
|
||||
self.ps = article.ps # Probably unused
|
||||
self.publicationDate = article.date
|
@ -1,124 +0,0 @@
|
||||
import re
|
||||
from os import path
|
||||
|
||||
|
||||
class content:
|
||||
_mappings = {
|
||||
"horizontal-rule": (
|
||||
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
||||
r"---",
|
||||
),
|
||||
"line-break": (
|
||||
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
||||
"\n",
|
||||
),
|
||||
"heading": (
|
||||
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
||||
r"## \1",
|
||||
),
|
||||
"strong": (
|
||||
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
||||
r"**\1**",
|
||||
),
|
||||
"emphasis": (
|
||||
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
||||
r"*\1*",
|
||||
),
|
||||
"strikethrough": (
|
||||
re.compile(
|
||||
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"~\1~",
|
||||
),
|
||||
"anchor": (
|
||||
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
||||
r"[\1](\2)",
|
||||
),
|
||||
"image": (
|
||||
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
||||
r"![image](\1)",
|
||||
),
|
||||
"document-anchors": (
|
||||
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
||||
r"[document](\1)",
|
||||
),
|
||||
"wikilink": (
|
||||
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
||||
r"[\1](https://wikipedia.org/wiki/\1)",
|
||||
),
|
||||
"footnote": (
|
||||
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
||||
r"",
|
||||
),
|
||||
"unordered-list": (
|
||||
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
||||
r"\1- ",
|
||||
),
|
||||
"wrong-unordered-list": (
|
||||
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
||||
r"\1- ",
|
||||
),
|
||||
"ordered-list": (
|
||||
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
||||
r"\g<1>1. ",
|
||||
),
|
||||
"table-metadata": (
|
||||
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
||||
r"",
|
||||
),
|
||||
"quote": (
|
||||
re.compile(
|
||||
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"> \1\2\2",
|
||||
),
|
||||
"box": (
|
||||
re.compile(
|
||||
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
"`\\1`",
|
||||
),
|
||||
"fence": (
|
||||
re.compile(
|
||||
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
"```\n\\1\n\n```",
|
||||
),
|
||||
"multi-language": ( # Keep only the first language
|
||||
re.compile(
|
||||
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"\1",
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, spip):
|
||||
self.markup = spip
|
||||
|
||||
def get_markdown(self):
|
||||
for spip, markdown in self._mappings.values():
|
||||
self.markup = spip.sub(markdown, self.markup)
|
||||
return self.markup.encode("utf-8").decode("utf-8")
|
||||
|
||||
|
||||
# Parses a file & display its parse tree
|
||||
def test(filename):
|
||||
raw = open(path.dirname(__file__) + "/" + filename).read()
|
||||
|
||||
print(f"--- Conversion of {filename} ---\n\n")
|
||||
c = content(raw)
|
||||
print(c.get_markdown())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test
|
||||
test("../test/0.spip")
|
||||
test("../test/1.spip")
|
||||
test("../test/2.spip")
|
||||
test("../test/3.spip")
|
||||
test("../test/4.spip")
|
101
spip2md/convert.py
Normal file
101
spip2md/convert.py
Normal file
@ -0,0 +1,101 @@
|
||||
import re
|
||||
|
||||
mappings = {
|
||||
"horizontal-rule": (
|
||||
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
||||
r"---",
|
||||
),
|
||||
"line-break": (
|
||||
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
||||
"\n",
|
||||
),
|
||||
"heading": (
|
||||
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
||||
r"## \1",
|
||||
),
|
||||
"strong": (
|
||||
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
||||
r"**\1**",
|
||||
),
|
||||
"emphasis": (
|
||||
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
||||
r"*\1*",
|
||||
),
|
||||
"strikethrough": (
|
||||
re.compile(
|
||||
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"~\1~",
|
||||
),
|
||||
"anchor": (
|
||||
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
||||
r"[\1](\2)",
|
||||
),
|
||||
"image": (
|
||||
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
||||
r"![image](\1)",
|
||||
),
|
||||
"document-anchors": (
|
||||
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
||||
r"[document](\1)",
|
||||
),
|
||||
"wikilink": (
|
||||
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
||||
r"[\1](https://wikipedia.org/wiki/\1)",
|
||||
),
|
||||
"footnote": (
|
||||
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
||||
r"",
|
||||
),
|
||||
"unordered-list": (
|
||||
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
||||
r"\1- ",
|
||||
),
|
||||
"wrong-unordered-list": (
|
||||
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
||||
r"\1- ",
|
||||
),
|
||||
"ordered-list": (
|
||||
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
||||
r"\g<1>1. ",
|
||||
),
|
||||
"table-metadata": (
|
||||
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
||||
r"",
|
||||
),
|
||||
"quote": (
|
||||
re.compile(
|
||||
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"> \1\2\2",
|
||||
),
|
||||
"box": (
|
||||
re.compile(
|
||||
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
"`\\1`",
|
||||
),
|
||||
"fence": (
|
||||
re.compile(
|
||||
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
||||
re.S | re.I,
|
||||
),
|
||||
"```\n\\1\n\n```",
|
||||
),
|
||||
"multi-language": ( # Keep only the first language
|
||||
re.compile(
|
||||
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||
re.S | re.I,
|
||||
),
|
||||
r"\1",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def convert(markup):
|
||||
for spip, markdown in mappings.values():
|
||||
markup = spip.sub(markdown, markup)
|
||||
return markup.encode("utf-8").decode("utf-8")
|
@ -4,8 +4,8 @@ from os import mkdir
|
||||
from shutil import rmtree
|
||||
|
||||
from config import CONFIG
|
||||
from content import content
|
||||
from metadata import metadata
|
||||
from convert import convert
|
||||
from Metadata import Metadata
|
||||
from SpipDatabase import *
|
||||
|
||||
# Clean the output dir & create a new
|
||||
@ -39,12 +39,11 @@ for exported in range(nbToExport):
|
||||
if exported > 0 and exported % 10 == 0:
|
||||
print(f"\n--- {nbToExport - exported} articles remaining ---\n")
|
||||
article = articles[exported]
|
||||
meta = metadata(article)
|
||||
meta = Metadata(article)
|
||||
|
||||
print(f"{exported+1}. Exporting {meta.title}")
|
||||
print(f" to {meta.get_slug()}/index.md")
|
||||
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
|
||||
body = content(article.texte)
|
||||
|
||||
mkdir(articleDir)
|
||||
with open("{}/index.md".format(articleDir), "w") as f:
|
||||
@ -52,7 +51,7 @@ for exported in range(nbToExport):
|
||||
"{}\n\n{}\n{}\n{}".format(
|
||||
meta.get_frontmatter(),
|
||||
meta.get_starting(),
|
||||
body.get_markdown(),
|
||||
convert(article.texte),
|
||||
meta.get_ending(),
|
||||
)
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user