refactor project structure
This commit is contained in:
parent
8eb0d1101a
commit
b8f99fb329
@ -1,7 +1,30 @@
|
|||||||
CONFIG = {
|
from yaml import CLoader as Loader
|
||||||
"db": "spip",
|
from yaml import load
|
||||||
"dbUser": "spip",
|
|
||||||
"dbPass": "password",
|
|
||||||
"outputDir": "output",
|
class Configuration:
|
||||||
"maxExportNb": 1000,
|
db = "spip"
|
||||||
}
|
dbHost = "localhost"
|
||||||
|
dbUser = "spip"
|
||||||
|
dbPass = "password"
|
||||||
|
outputDir = "output"
|
||||||
|
defaultNbToExport = 1000
|
||||||
|
|
||||||
|
def __init__(self, configFile=None) -> None:
|
||||||
|
if configFile != None:
|
||||||
|
with open(configFile) as f:
|
||||||
|
config = load(f.read(), Loader=Loader)
|
||||||
|
if "db" in config:
|
||||||
|
self.db = config["db"]
|
||||||
|
if "dbUser" in config:
|
||||||
|
self.dbUser = config["dbUser"]
|
||||||
|
if "dbPass" in config:
|
||||||
|
self.dbPass = config["dbPass"]
|
||||||
|
if "outputDir" in config:
|
||||||
|
self.outputDir = config["outputDir"]
|
||||||
|
if "defaultNbToExport" in config:
|
||||||
|
self.defaultNbToExport = config["defaultNbToExport"]
|
||||||
|
|
||||||
|
|
||||||
|
# config = Configuration("spip2md.yml")
|
||||||
|
config = Configuration()
|
||||||
|
@ -1,159 +1,159 @@
|
|||||||
import re
|
from re import I, S, compile
|
||||||
|
|
||||||
# SPIP syntax to Markdown
|
# SPIP syntax to Markdown
|
||||||
spipToMarkdown = (
|
spipToMarkdown = (
|
||||||
( # horizontal rule
|
( # horizontal rule
|
||||||
re.compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", re.S | re.I),
|
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
|
||||||
# r"---",
|
# r"---",
|
||||||
r"***",
|
r"***",
|
||||||
),
|
),
|
||||||
( # line break
|
( # line break
|
||||||
re.compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", re.S | re.I),
|
compile(r"\r?\n_ *(?=\r?\n)|<br ?.*?>", S | I),
|
||||||
"\n",
|
"\n",
|
||||||
),
|
),
|
||||||
( # heading
|
( # heading
|
||||||
re.compile(r"\{\{\{ *(.*?) *\}\}\}", re.S | re.I),
|
compile(r"\{\{\{ *(.*?) *\}\}\}", S | I),
|
||||||
r"# \1",
|
r"# \1",
|
||||||
# r"## \1",
|
# r"## \1",
|
||||||
),
|
),
|
||||||
( # strong
|
( # strong
|
||||||
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
||||||
r"**\1**",
|
r"**\1**",
|
||||||
),
|
),
|
||||||
( # html strong
|
( # html strong
|
||||||
re.compile(r"<strong> *(.*?) *</strong>", re.S | re.I),
|
compile(r"<strong> *(.*?) *</strong>", S | I),
|
||||||
r"**\1**",
|
r"**\1**",
|
||||||
),
|
),
|
||||||
( # emphasis
|
( # emphasis
|
||||||
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
compile(r"\{ *(.*?) *\}", S | I),
|
||||||
r"*\1*",
|
r"*\1*",
|
||||||
),
|
),
|
||||||
( # html emphasis
|
( # html emphasis
|
||||||
re.compile(r"<i> *(.*?) *<\/i>", re.S | re.I),
|
compile(r"<i> *(.*?) *<\/i>", S | I),
|
||||||
r"*\1*",
|
r"*\1*",
|
||||||
),
|
),
|
||||||
( # strikethrough
|
( # strikethrough
|
||||||
re.compile(
|
compile(
|
||||||
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
r"~\1~",
|
r"~\1~",
|
||||||
),
|
),
|
||||||
( # anchor
|
( # anchor
|
||||||
re.compile(r"\[ *(.*?) *-> *(.*?) *\]", re.S | re.I),
|
compile(r"\[ *(.*?) *-> *(.*?) *\]", S | I),
|
||||||
r"[\1](\2)",
|
r"[\1](\2)",
|
||||||
),
|
),
|
||||||
( # image
|
( # image
|
||||||
re.compile(r"<(?:img|image)(.*?)(\|.*?)*>", re.S | re.I),
|
compile(r"<(?:img|image)(.*?)(\|.*?)*>", S | I),
|
||||||
r"![image](\1)",
|
r"![image](\1)",
|
||||||
),
|
),
|
||||||
( # document anchor
|
( # document anchor
|
||||||
re.compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", re.S | re.I),
|
compile(r"<(?:doc|emb)(.*?)(\|.*?)*>", S | I),
|
||||||
r"[document](\1)",
|
r"[document](\1)",
|
||||||
),
|
),
|
||||||
( # wikilink
|
( # wikilink
|
||||||
re.compile(r"\[\? *(.*?) *\]", re.S | re.I),
|
compile(r"\[\? *(.*?) *\]", S | I),
|
||||||
r"[\1](https://wikipedia.org/wiki/\1)",
|
r"[\1](https://wikipedia.org/wiki/\1)",
|
||||||
),
|
),
|
||||||
( # footnote
|
( # footnote
|
||||||
re.compile(r"\[\[ *(.*?) *\]\]", re.S | re.I),
|
compile(r"\[\[ *(.*?) *\]\]", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
( # unordered list
|
( # unordered list
|
||||||
re.compile(r"(\r?\n)-(?!#|-)\*? *", re.S | re.I),
|
compile(r"(\r?\n)-(?!#|-)\*? *", S | I),
|
||||||
r"\1- ",
|
r"\1- ",
|
||||||
),
|
),
|
||||||
( # wrong unordered list
|
( # wrong unordered list
|
||||||
re.compile(r"(\r?\n)\* +", re.S | re.I),
|
compile(r"(\r?\n)\* +", S | I),
|
||||||
r"\1- ",
|
r"\1- ",
|
||||||
),
|
),
|
||||||
( # wrong unordered list WARNING suppresses preceding tag
|
( # wrong unordered list WARNING suppresses preceding tag
|
||||||
re.compile(r"(\r?\n)<.*?>\* +", re.I),
|
compile(r"(\r?\n)<.*?>\* +", I),
|
||||||
r"\1- ",
|
r"\1- ",
|
||||||
),
|
),
|
||||||
( # ordered-list
|
( # ordered-list
|
||||||
re.compile(r"(\r?\n)-# *", re.S | re.I),
|
compile(r"(\r?\n)-# *", S | I),
|
||||||
r"\g<1>1. ",
|
r"\g<1>1. ",
|
||||||
),
|
),
|
||||||
( # table-metadata
|
( # table-metadata
|
||||||
re.compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", re.S | re.I),
|
compile(r"(\r?\n)\|\|(.*?)\|(.*?)\|\|", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
( # quote
|
( # quote
|
||||||
re.compile(
|
compile(
|
||||||
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
r"<(?:quote|poesie)>\s*(.*?)\s*(?:(\r?\n){2,}|<\/(?:quote|poesie)>)",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
r"> \1\2\2",
|
r"> \1\2\2",
|
||||||
),
|
),
|
||||||
( # box
|
( # box
|
||||||
re.compile(
|
compile(
|
||||||
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
r"<code>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/code>)",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
"`\\1`",
|
"`\\1`",
|
||||||
),
|
),
|
||||||
( # fence
|
( # fence
|
||||||
re.compile(
|
compile(
|
||||||
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
r"<cadre>\s*(.*?)\s*(?:(?:\r?\n){2,}|<\/cadre>)",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
"```\n\\1\n\n```",
|
"```\n\\1\n\n```",
|
||||||
),
|
),
|
||||||
( # Keep only the first language in multi-language blocks
|
( # Keep only the first language in multi-language blocks
|
||||||
re.compile(
|
compile(
|
||||||
r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # WARNING remove every html tag
|
( # WARNING remove every html tag
|
||||||
re.compile(r"<\/?.*?> *", re.S | re.I),
|
compile(r"<\/?.*?> *", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
spipToMetadata = (
|
spipToText = (
|
||||||
( # strong
|
( # strong
|
||||||
re.compile(r"\{\{ *(.*?) *\}\}", re.S | re.I),
|
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # html strong
|
( # html strong
|
||||||
re.compile(r"<strong> *(.*?) *</strong>", re.S | re.I),
|
compile(r"<strong> *(.*?) *</strong>", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # emphasis
|
( # emphasis
|
||||||
re.compile(r"\{ *(.*?) *\}", re.S | re.I),
|
compile(r"\{ *(.*?) *\}", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # html emphasis
|
( # html emphasis
|
||||||
re.compile(r"<i> *(.*?) *<\/i>", re.S | re.I),
|
compile(r"<i> *(.*?) *<\/i>", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # strikethrough
|
( # strikethrough
|
||||||
re.compile(
|
compile(
|
||||||
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # Keep only the first language in multi-language blocks
|
( # Keep only the first language in multi-language blocks
|
||||||
re.compile(
|
compile(
|
||||||
r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
|
||||||
re.S | re.I,
|
S | I,
|
||||||
),
|
),
|
||||||
r"\1",
|
r"\1",
|
||||||
),
|
),
|
||||||
( # remove every html tag
|
( # remove every html tag
|
||||||
re.compile(r"<\/?.*?> *", re.S | re.I),
|
compile(r"<\/?.*?> *", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
( # beginning with angle bracket(s)
|
( # beginning with angle bracket(s)
|
||||||
re.compile(r"^>+ +", re.S | re.I),
|
compile(r"^>+ +", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
( # beginning with a number followed by a dot
|
( # beginning with a number followed by a dot
|
||||||
re.compile(r"^\d+\. +", re.S | re.I),
|
compile(r"^\d+\. +", S | I),
|
||||||
r"",
|
r"",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@ -161,117 +161,119 @@ spipToMetadata = (
|
|||||||
isoToUtf = (
|
isoToUtf = (
|
||||||
# Broken encoding
|
# Broken encoding
|
||||||
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
||||||
re.compile("’"),
|
compile("’"),
|
||||||
r"’",
|
r"’",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 † that was interpreted as ISO 8859-1
|
( # Fix UTF-8 † that was interpreted as ISO 8859-1
|
||||||
re.compile("‘"),
|
compile("‘"),
|
||||||
r"‘",
|
r"‘",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
||||||
re.compile("eÌ\u0081"),
|
compile("eÌ\u0081"),
|
||||||
r"é",
|
r"é",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 è that was interpreted as ISO 8859-1
|
( # Fix UTF-8 è that was interpreted as ISO 8859-1
|
||||||
re.compile("è"),
|
compile("è"),
|
||||||
r"è",
|
r"è",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ê that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ê that was interpreted as ISO 8859-1
|
||||||
re.compile("ê"),
|
compile("ê"),
|
||||||
r"ê",
|
r"ê",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ê that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ê that was interpreted as ISO 8859-1
|
||||||
re.compile("ô"),
|
compile("ô"),
|
||||||
r"ô",
|
r"ô",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 î that was interpreted as ISO 8859-1
|
( # Fix UTF-8 î that was interpreted as ISO 8859-1
|
||||||
re.compile("î"),
|
compile("î"),
|
||||||
r"î",
|
r"î",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ï that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ï that was interpreted as ISO 8859-1
|
||||||
re.compile("ï"),
|
compile("ï"),
|
||||||
r"ï",
|
r"ï",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ö that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ö that was interpreted as ISO 8859-1
|
||||||
re.compile("ö"),
|
compile("ö"),
|
||||||
r"ö",
|
r"ö",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ö that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ö that was interpreted as ISO 8859-1
|
||||||
re.compile("ü"),
|
compile("ü"),
|
||||||
r"ü",
|
r"ü",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
||||||
re.compile("à"),
|
compile("à"),
|
||||||
r"à",
|
r"à",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 … that was interpreted as ISO 8859-1
|
( # Fix UTF-8 … that was interpreted as ISO 8859-1
|
||||||
re.compile("…"),
|
compile("…"),
|
||||||
r"…",
|
r"…",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 “ that was interpreted as ISO 8859-1
|
( # Fix UTF-8 “ that was interpreted as ISO 8859-1
|
||||||
re.compile("“"),
|
compile("“"),
|
||||||
r"“",
|
r"“",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ” that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ” that was interpreted as ISO 8859-1
|
||||||
re.compile("â€\u009d"),
|
compile("â€\u009d"),
|
||||||
r"”",
|
r"”",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 – that was interpreted as ISO 8859-1
|
( # Fix UTF-8 – that was interpreted as ISO 8859-1
|
||||||
re.compile("–"),
|
compile("–"),
|
||||||
r"–",
|
r"–",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 – that was interpreted as ISO 8859-1
|
( # Fix UTF-8 – that was interpreted as ISO 8859-1
|
||||||
re.compile("—"),
|
compile("—"),
|
||||||
r"—",
|
r"—",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 − that was interpreted as ISO 8859-1
|
( # Fix UTF-8 − that was interpreted as ISO 8859-1
|
||||||
re.compile("â€\u0090"),
|
compile("â€\u0090"),
|
||||||
r"−",
|
r"−",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 • that was interpreted as ISO 8859-1
|
( # Fix UTF-8 • that was interpreted as ISO 8859-1
|
||||||
re.compile("•"),
|
compile("•"),
|
||||||
r"•",
|
r"•",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 ç that was interpreted as ISO 8859-1
|
( # Fix UTF-8 ç that was interpreted as ISO 8859-1
|
||||||
re.compile("ç"),
|
compile("ç"),
|
||||||
r"ç",
|
r"ç",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 í that was interpreted as ISO 8859-1
|
( # Fix UTF-8 í that was interpreted as ISO 8859-1
|
||||||
re.compile("iÌ\u0081"),
|
compile("iÌ\u0081"),
|
||||||
r"í",
|
r"í",
|
||||||
),
|
),
|
||||||
# WARNING not sure
|
# WARNING not sure
|
||||||
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
( # Fix UTF-8 é that was interpreted as ISO 8859-1
|
||||||
re.compile("eÌ "),
|
compile("eÌ "),
|
||||||
r"é",
|
r"é",
|
||||||
),
|
),
|
||||||
( # Fix UTF-8 † that was interpreted as ISO 8859-1
|
( # Fix UTF-8 † that was interpreted as ISO 8859-1
|
||||||
re.compile("†"),
|
compile("†"),
|
||||||
r"† ",
|
r"† ",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
## WARNING unknown broken encoding
|
## WARNING unknown broken encoding
|
||||||
unknownIso = (re.compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings
|
unknownIso = (compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings
|
||||||
|
|
||||||
|
|
||||||
def convert(markup):
|
def convertBody(spipBody):
|
||||||
|
text = spipBody
|
||||||
for spip, markdown in spipToMarkdown:
|
for spip, markdown in spipToMarkdown:
|
||||||
markup = spip.sub(markdown, markup)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
markup = iso.sub(utf, markup)
|
text = iso.sub(utf, text)
|
||||||
for iso in unknownIso:
|
for iso in unknownIso:
|
||||||
for match in iso.finditer(markup):
|
for match in iso.finditer(text):
|
||||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
print(f" UNKNOWN CHARACTER {match.group()}")
|
||||||
return markup
|
return text
|
||||||
|
|
||||||
|
|
||||||
def convertMeta(markup):
|
def convertMeta(spipMeta):
|
||||||
for spip, metadata in spipToMetadata:
|
text = spipMeta
|
||||||
markup = spip.sub(metadata, markup)
|
for spip, metadata in spipToText:
|
||||||
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
markup = iso.sub(utf, markup)
|
text = iso.sub(utf, text)
|
||||||
for iso in unknownIso:
|
for iso in unknownIso:
|
||||||
for match in iso.finditer(markup):
|
for match in iso.finditer(text):
|
||||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
print(f" UNKNOWN CHARACTER {match.group()}")
|
||||||
return markup
|
return text
|
@ -1,21 +1,13 @@
|
|||||||
from config import CONFIG
|
from peewee import (SQL, BigAutoField, BigIntegerField, CharField,
|
||||||
from peewee import *
|
CompositeKey, DateField, DateTimeField, FloatField,
|
||||||
|
IntegerField, Model, MySQLDatabase, TextField)
|
||||||
|
|
||||||
db = MySQLDatabase(
|
# class UnknownField(object):
|
||||||
CONFIG["db"],
|
# def __init__(self, *_, **__):
|
||||||
**{
|
# pass
|
||||||
"charset": "utf8",
|
|
||||||
"sql_mode": "PIPES_AS_CONCAT",
|
|
||||||
"use_unicode": True,
|
|
||||||
"user": CONFIG["dbUser"],
|
|
||||||
"password": CONFIG["dbPass"],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class UnknownField(object):
|
db = MySQLDatabase(None)
|
||||||
def __init__(self, *_, **__):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class BaseModel(Model):
|
class BaseModel(Model):
|
@ -1,10 +1,11 @@
|
|||||||
import yaml
|
from converter import convertBody, convertMeta
|
||||||
from convert import convertMeta
|
from database import *
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from SpipDatabase import *
|
# from yaml import CDumper as Dumper
|
||||||
|
from yaml import dump
|
||||||
|
|
||||||
|
|
||||||
class metadata:
|
class Article:
|
||||||
def __init__(self, article):
|
def __init__(self, article):
|
||||||
self.id = article.id_article
|
self.id = article.id_article
|
||||||
# self.surtitle = article.surtitre # Probably unused
|
# self.surtitle = article.surtitre # Probably unused
|
||||||
@ -13,6 +14,7 @@ class metadata:
|
|||||||
# self.section = article.id_rubrique # TODO join
|
# self.section = article.id_rubrique # TODO join
|
||||||
self.description = convertMeta(article.descriptif)
|
self.description = convertMeta(article.descriptif)
|
||||||
self.caption = article.chapo # Probably unused
|
self.caption = article.chapo # Probably unused
|
||||||
|
self.text = convertBody(article.texte) # Markdown
|
||||||
self.ps = article.ps # Probably unused
|
self.ps = article.ps # Probably unused
|
||||||
self.publicationDate = article.date
|
self.publicationDate = article.date
|
||||||
self.draft = False if article.statut == "publie" else True
|
self.draft = False if article.statut == "publie" else True
|
||||||
@ -37,12 +39,15 @@ class metadata:
|
|||||||
def get_slug(self):
|
def get_slug(self):
|
||||||
return slugify(f"{self.id}-{self.title}")
|
return slugify(f"{self.id}-{self.title}")
|
||||||
|
|
||||||
|
def get_path(self):
|
||||||
|
return self.get_slug()
|
||||||
|
|
||||||
def get_authors(self):
|
def get_authors(self):
|
||||||
return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
|
return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
|
||||||
|
|
||||||
def get_frontmatter(self):
|
def get_frontmatter(self):
|
||||||
return "---\n{}---".format(
|
return "---\n{}---".format(
|
||||||
yaml.dump(
|
dump(
|
||||||
{
|
{
|
||||||
"lang": self.lang,
|
"lang": self.lang,
|
||||||
"title": self.title,
|
"title": self.title,
|
||||||
@ -78,3 +83,37 @@ class metadata:
|
|||||||
if len(self.microblog) > 0
|
if len(self.microblog) > 0
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def get_article(self):
|
||||||
|
return "{}\n{}\n{}\n{}".format(
|
||||||
|
self.get_frontmatter(),
|
||||||
|
self.get_starting(),
|
||||||
|
self.text,
|
||||||
|
self.get_ending(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Articles:
|
||||||
|
exported: int = 0
|
||||||
|
|
||||||
|
def __init__(self, maxToExport) -> None:
|
||||||
|
# Query the DB to retrieve all articles sorted by publication date
|
||||||
|
self.articles = (
|
||||||
|
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxToExport)
|
||||||
|
)
|
||||||
|
self.toExport: int = len(self.articles)
|
||||||
|
|
||||||
|
def remaining(self):
|
||||||
|
return self.toExport - self.exported
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
if self.remaining() <= 0:
|
||||||
|
raise StopIteration
|
||||||
|
self.exported += 1
|
||||||
|
return (
|
||||||
|
{"exported": self.exported, "remaining": self.remaining()},
|
||||||
|
Article(self.articles[self.exported - 1]),
|
||||||
|
)
|
50
spip2md/main.py
Executable file
50
spip2md/main.py
Executable file
@ -0,0 +1,50 @@
|
|||||||
|
#!python
|
||||||
|
from config import config
|
||||||
|
from database import db
|
||||||
|
from iterator import Articles
|
||||||
|
|
||||||
|
if __name__ != "__main__":
|
||||||
|
exit()
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from os import mkdir
|
||||||
|
from shutil import rmtree
|
||||||
|
|
||||||
|
# Clean the output dir & create a new
|
||||||
|
rmtree(config.outputDir, True)
|
||||||
|
mkdir(config.outputDir)
|
||||||
|
|
||||||
|
# Connect to the MySQL database with Peewee ORM
|
||||||
|
db.init(config.db, host=config.dbHost, user=config.dbUser, password=config.dbPass)
|
||||||
|
db.connect()
|
||||||
|
|
||||||
|
# Define max nb of articles to export based on first CLI param
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
maxToExport = int(sys.argv[1])
|
||||||
|
else:
|
||||||
|
maxToExport = config.defaultNbToExport
|
||||||
|
|
||||||
|
# Define terminal escape sequences to stylize output
|
||||||
|
R = "\033[91m"
|
||||||
|
G = "\033[92m"
|
||||||
|
B = "\033[94m"
|
||||||
|
BOLD = "\033[1m"
|
||||||
|
UNDERLINE = "\033[4m"
|
||||||
|
RESET = "\033[0m"
|
||||||
|
|
||||||
|
# Loop among first maxToExport articles & export them
|
||||||
|
for counter, article in Articles(maxToExport):
|
||||||
|
if (counter["exported"] - 1) % 100 == 0:
|
||||||
|
print(
|
||||||
|
f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}"
|
||||||
|
+ f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n"
|
||||||
|
)
|
||||||
|
print(f"{BOLD}{counter['exported']}.{RESET} {article.title}")
|
||||||
|
fullPath = config.outputDir + "/" + article.get_path()
|
||||||
|
print(f"\t-> {fullPath}/index.md")
|
||||||
|
mkdir(fullPath)
|
||||||
|
with open(fullPath + "/index.md", "w") as f:
|
||||||
|
f.write(article.get_article())
|
||||||
|
|
||||||
|
# Close the database connection
|
||||||
|
db.close()
|
@ -1,65 +0,0 @@
|
|||||||
#!python
|
|
||||||
import sys
|
|
||||||
from os import mkdir
|
|
||||||
from shutil import rmtree
|
|
||||||
|
|
||||||
from config import CONFIG
|
|
||||||
from convert import convert
|
|
||||||
from Metadata import metadata
|
|
||||||
from SpipDatabase import *
|
|
||||||
|
|
||||||
# Clean the output dir & create a new
|
|
||||||
rmtree(CONFIG["outputDir"], True)
|
|
||||||
mkdir(CONFIG["outputDir"])
|
|
||||||
|
|
||||||
# Connect to the MySQL database with Peewee ORM
|
|
||||||
db.connect()
|
|
||||||
|
|
||||||
# Query the DB to retrieve all articles sorted by publication date
|
|
||||||
articles = SpipArticles.select().order_by(SpipArticles.date.desc())
|
|
||||||
# Query the DB to retrieve all articles sorted by modification date
|
|
||||||
# articles = SpipArticles.select().order_by(SpipArticles.date_modif.desc())
|
|
||||||
|
|
||||||
# Choose how many articles to export based on first param
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
if int(sys.argv[1]) > 0 and int(sys.argv[1]) < len(articles):
|
|
||||||
nbToExport = int(sys.argv[1])
|
|
||||||
else:
|
|
||||||
nbToExport = len(articles)
|
|
||||||
else:
|
|
||||||
if len(articles) > CONFIG["maxExportNb"]:
|
|
||||||
nbToExport = CONFIG["maxExportNb"]
|
|
||||||
else:
|
|
||||||
nbToExport = len(articles)
|
|
||||||
|
|
||||||
print(f"--- Export of {nbToExport} SPIP articles to Markdown & YAML files ---\n")
|
|
||||||
|
|
||||||
# Loop among every articles & export them in Markdown files
|
|
||||||
for exported in range(nbToExport):
|
|
||||||
if exported > 0 and exported % 10 == 0:
|
|
||||||
print(f"\n--- {nbToExport - exported} articles remaining ---\n")
|
|
||||||
article = articles[exported]
|
|
||||||
meta = metadata(article)
|
|
||||||
|
|
||||||
print(f"{exported+1}. Exporting {meta.title}")
|
|
||||||
print(f" to {meta.get_slug()}/index.md")
|
|
||||||
articleDir = "{}/{}".format(CONFIG["outputDir"], meta.get_slug())
|
|
||||||
|
|
||||||
mkdir(articleDir)
|
|
||||||
with open("{}/index.md".format(articleDir), "w") as f:
|
|
||||||
f.write(
|
|
||||||
"{}\n{}\n{}\n{}".format(
|
|
||||||
meta.get_frontmatter(),
|
|
||||||
meta.get_starting(),
|
|
||||||
convert(article.texte),
|
|
||||||
meta.get_ending(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Close the database connection
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
# Announce the end of the script
|
|
||||||
print(
|
|
||||||
f"\n--- Exported {nbToExport} SPIP articles to ./{CONFIG['outputDir']}/*/index.md ---"
|
|
||||||
)
|
|
Loading…
Reference in New Issue
Block a user