simplified architecture
This commit is contained in:
parent
5e86ed0ed5
commit
723a7ddeea
@ -1,28 +1,18 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
from convert import convert
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from SpipDatabase import *
|
from SpipDatabase import *
|
||||||
|
|
||||||
|
|
||||||
def clean(string):
|
class Metadata:
|
||||||
cleaned = string
|
|
||||||
# Keep only the first lang of multilingual titles
|
|
||||||
cleaned = re.sub(
|
|
||||||
r"<multi>\s*\[[a-z]{2}\]\s*([^<\[]*)\s*[^<]*\s*<\/multi>", r"\1", cleaned
|
|
||||||
)
|
|
||||||
# cleaned = re.sub(r"<[^\>]*>", r"", cleaned)
|
|
||||||
return cleaned
|
|
||||||
|
|
||||||
|
|
||||||
class metadata:
|
|
||||||
def __init__(self, article):
|
def __init__(self, article):
|
||||||
self.id = article.id_article
|
self.id = article.id_article
|
||||||
# self.surtitle = article.surtitre # Probably unused
|
# self.surtitle = article.surtitre # Probably unused
|
||||||
self.title = clean(article.titre)
|
# self.title = self.clean(article.titre)
|
||||||
|
self.title = convert(article.titre)
|
||||||
self.subtitle = article.soustitre # Probably unused
|
self.subtitle = article.soustitre # Probably unused
|
||||||
# self.section = article.id_rubrique # TODO join
|
# self.section = article.id_rubrique # TODO join
|
||||||
self.description = article.descriptif
|
self.description = convert(article.descriptif)
|
||||||
self.caption = article.chapo # Probably unused
|
self.caption = article.chapo # Probably unused
|
||||||
self.ps = article.ps # Probably unused
|
self.ps = article.ps # Probably unused
|
||||||
self.publicationDate = article.date
|
self.publicationDate = article.date
|
@ -1,124 +0,0 @@
|
|||||||
import re
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
|
|
||||||
class content:
|
|
||||||
_mappings = {
|
|
||||||
"horizontal-rule": (
|
|
||||||
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
|
||||||
r"---",
|
|
||||||
),
|
|
||||||
"line-break": (
|
|
||||||
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
|
||||||
"\n",
|
|
||||||
),
|
|
||||||
"heading": (
|
|
||||||
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
|
||||||
r"## \1",
|
|
||||||
),
|
|
||||||
"strong": (
|
|
||||||
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
|
||||||
r"**\1**",
|
|
||||||
),
|
|
||||||
"emphasis": (
|
|
||||||
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
|
||||||
r"*\1*",
|
|
||||||
),
|
|
||||||
"strikethrough": (
|
|
||||||
re.compile(
|
|
||||||
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
|
||||||
re.S | re.I,
|
|
||||||
),
|
|
||||||
r"~\1~",
|
|
||||||
),
|
|
||||||
"anchor": (
|
|
||||||
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
|
||||||
r"[\1](\2)",
|
|
||||||
),
|
|
||||||
"image": (
|
|
||||||
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
|
||||||
r"![image](\1)",
|
|
||||||
),
|
|
||||||
"document-anchors": (
|
|
||||||
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
|
||||||
r"[document](\1)",
|
|
||||||
),
|
|
||||||
"wikilink": (
|
|
||||||
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
|
||||||
r"[\1](https://wikipedia.org/wiki/\1)",
|
|
||||||
),
|
|
||||||
"footnote": (
|
|
||||||
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
|
||||||
r"",
|
|
||||||
),
|
|
||||||
"unordered-list": (
|
|
||||||
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
|
||||||
r"\1- ",
|
|
||||||
),
|
|
||||||
"wrong-unordered-list": (
|
|
||||||
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
|
||||||
r"\1- ",
|
|
||||||
),
|
|
||||||
"ordered-list": (
|
|
||||||
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
|
||||||
r"\g<1>1. ",
|
|
||||||
),
|
|
||||||
"table-metadata": (
|
|
||||||
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
|
||||||
r"",
|
|
||||||
),
|
|
||||||
"quote": (
|
|
||||||
re.compile(
|
|
||||||
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
|
||||||
re.S | re.I,
|
|
||||||
),
|
|
||||||
r"> \1\2\2",
|
|
||||||
),
|
|
||||||
"box": (
|
|
||||||
re.compile(
|
|
||||||
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
|
||||||
re.S | re.I,
|
|
||||||
),
|
|
||||||
"`\\1`",
|
|
||||||
),
|
|
||||||
"fence": (
|
|
||||||
re.compile(
|
|
||||||
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
|
||||||
re.S | re.I,
|
|
||||||
),
|
|
||||||
"```\n\\1\n\n```",
|
|
||||||
),
|
|
||||||
"multi-language": ( # Keep only the first language
|
|
||||||
re.compile(
|
|
||||||
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
|
||||||
re.S | re.I,
|
|
||||||
),
|
|
||||||
r"\1",
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, spip):
|
|
||||||
self.markup = spip
|
|
||||||
|
|
||||||
def get_markdown(self):
|
|
||||||
for spip, markdown in self._mappings.values():
|
|
||||||
self.markup = spip.sub(markdown, self.markup)
|
|
||||||
return self.markup.encode("utf-8").decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
# Parses a file & display its parse tree
|
|
||||||
def test(filename):
|
|
||||||
raw = open(path.dirname(__file__) + "/" + filename).read()
|
|
||||||
|
|
||||||
print(f"--- Conversion of {filename} ---\n\n")
|
|
||||||
c = content(raw)
|
|
||||||
print(c.get_markdown())
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Test
|
|
||||||
test("../test/0.spip")
|
|
||||||
test("../test/1.spip")
|
|
||||||
test("../test/2.spip")
|
|
||||||
test("../test/3.spip")
|
|
||||||
test("../test/4.spip")
|
|
101
spip2md/convert.py
Normal file
101
spip2md/convert.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
mappings = {
|
||||||
|
"horizontal-rule": (
|
||||||
|
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
||||||
|
r"---",
|
||||||
|
),
|
||||||
|
"line-break": (
|
||||||
|
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
||||||
|
"\n",
|
||||||
|
),
|
||||||
|
"heading": (
|
||||||
|
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
||||||
|
r"## \1",
|
||||||
|
),
|
||||||
|
"strong": (
|
||||||
|
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
||||||
|
r"**\1**",
|
||||||
|
),
|
||||||
|
"emphasis": (
|
||||||
|
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
||||||
|
r"*\1*",
|
||||||
|
),
|
||||||
|
"strikethrough": (
|
||||||
|
re.compile(
|
||||||
|
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"~\1~",
|
||||||
|
),
|
||||||
|
"anchor": (
|
||||||
|
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
||||||
|
r"[\1](\2)",
|
||||||
|
),
|
||||||
|
"image": (
|
||||||
|
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
||||||
|
r"![image](\1)",
|
||||||
|
),
|
||||||
|
"document-anchors": (
|
||||||
|
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
||||||
|
r"[document](\1)",
|
||||||
|
),
|
||||||
|
"wikilink": (
|
||||||
|
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
||||||
|
r"[\1](https://wikipedia.org/wiki/\1)",
|
||||||
|
),
|
||||||
|
"footnote": (
|
||||||
|
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
||||||
|
r"",
|
||||||
|
),
|
||||||
|
"unordered-list": (
|
||||||
|
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
||||||
|
r"\1- ",
|
||||||
|
),
|
||||||
|
"wrong-unordered-list": (
|
||||||
|
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
||||||
|
r"\1- ",
|
||||||
|
),
|
||||||
|
"ordered-list": (
|
||||||
|
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
||||||
|
r"\g<1>1. ",
|
||||||
|
),
|
||||||
|
"table-metadata": (
|
||||||
|
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
||||||
|
r"",
|
||||||
|
),
|
||||||
|
"quote": (
|
||||||
|
re.compile(
|
||||||
|
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"> \1\2\2",
|
||||||
|
),
|
||||||
|
"box": (
|
||||||
|
re.compile(
|
||||||
|
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
"`\\1`",
|
||||||
|
),
|
||||||
|
"fence": (
|
||||||
|
re.compile(
|
||||||
|
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
"```\n\\1\n\n```",
|
||||||
|
),
|
||||||
|
"multi-language": ( # Keep only the first language
|
||||||
|
re.compile(
|
||||||
|
r"<multi>\s*\[.{2,4}\]\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||||
|
re.S | re.I,
|
||||||
|
),
|
||||||
|
r"\1",
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def convert(markup):
|
||||||
|
for spip, markdown in mappings.values():
|
||||||
|
markup = spip.sub(markdown, markup)
|
||||||
|
return markup.encode("utf-8").decode("utf-8")
|
@ -4,8 +4,8 @@ from os import mkdir
|
|||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
|
||||||
from config import CONFIG
|
from config import CONFIG
|
||||||
from content import content
|
from convert import convert
|
||||||
from metadata import metadata
|
from Metadata import Metadata
|
||||||
from SpipDatabase import *
|
from SpipDatabase import *
|
||||||
|
|
||||||
# Clean the output dir & create a new
|
# Clean the output dir & create a new
|
||||||
@ -39,12 +39,11 @@ for exported in range(nbToExport):
|
|||||||
if exported > 0 and exported % 10 == 0:
|
if exported > 0 and exported % 10 == 0:
|
||||||
print(f"\n--- {nbToExport - exported} articles remaining ---\n")
|
print(f"\n--- {nbToExport - exported} articles remaining ---\n")
|
||||||
article = articles[exported]
|
article = articles[exported]
|
||||||
meta = metadata(article)
|
meta = Metadata(article)
|
||||||
|
|
||||||
print(f"{exported+1}. Exporting {meta.title}")
|
print(f"{exported+1}. Exporting {meta.title}")
|
||||||
print(f" to {meta.get_slug()}/index.md")
|
print(f" to {meta.get_slug()}/index.md")
|
||||||
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
|
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
|
||||||
body = content(article.texte)
|
|
||||||
|
|
||||||
mkdir(articleDir)
|
mkdir(articleDir)
|
||||||
with open("{}/index.md".format(articleDir), "w") as f:
|
with open("{}/index.md".format(articleDir), "w") as f:
|
||||||
@ -52,7 +51,7 @@ for exported in range(nbToExport):
|
|||||||
"{}\n\n{}\n{}\n{}".format(
|
"{}\n\n{}\n{}\n{}".format(
|
||||||
meta.get_frontmatter(),
|
meta.get_frontmatter(),
|
||||||
meta.get_starting(),
|
meta.get_starting(),
|
||||||
body.get_markdown(),
|
convert(article.texte),
|
||||||
meta.get_ending(),
|
meta.get_ending(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user