PEP8 refactor
This commit is contained in:
parent
aa1b822688
commit
e1c8bd4b2e
@ -1,23 +1,25 @@
|
|||||||
# pyright: basic
|
# pyright: basic
|
||||||
from re import finditer
|
from re import finditer
|
||||||
|
|
||||||
from converter import convertBody, convertMeta, unknownIso
|
|
||||||
from database import *
|
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
# from yaml import CDumper as Dumper
|
|
||||||
from yaml import dump
|
from yaml import dump
|
||||||
|
|
||||||
|
from converter import convert_body, convert_meta, unknown_iso
|
||||||
|
from database import SpipArticles, SpipAuteurs, SpipAuteursLiens, SpipRubriques
|
||||||
|
|
||||||
|
# from yaml import CDumper as Dumper
|
||||||
|
|
||||||
|
|
||||||
class Article:
|
class Article:
|
||||||
def __init__(self, article):
|
def __init__(self, article):
|
||||||
self.id: int = article.id_article
|
self.id: int = article.id_article
|
||||||
# self.surtitle = article.surtitre # Probably unused
|
# self.surtitle = article.surtitre # Probably unused
|
||||||
self.title: str = convertMeta(article.titre)
|
self.title: str = convert_meta(article.titre)
|
||||||
self.subtitle: str = article.soustitre # Probably unused
|
self.subtitle: str = article.soustitre # Probably unused
|
||||||
self.section_id: int = article.id_rubrique
|
self.section_id: int = article.id_rubrique
|
||||||
self.description: str = convertMeta(article.descriptif)
|
self.description: str = convert_meta(article.descriptif)
|
||||||
self.caption: str = article.chapo # Probably unused
|
self.caption: str = article.chapo # Probably unused
|
||||||
self.text: str = convertBody(article.texte) # Markdown
|
self.text: str = convert_body(article.texte) # Markdown
|
||||||
self.ps: str = article.ps # Probably unused
|
self.ps: str = article.ps # Probably unused
|
||||||
self.publicationDate: str = article.date
|
self.publicationDate: str = article.date
|
||||||
self.draft: bool = False if article.statut == "publie" else True
|
self.draft: bool = False if article.statut == "publie" else True
|
||||||
@ -39,22 +41,22 @@ class Article:
|
|||||||
self.virtual: str = article.virtuel # TODO Why ?
|
self.virtual: str = article.virtuel # TODO Why ?
|
||||||
self.microblog: str = article.microblog # Probably unused
|
self.microblog: str = article.microblog # Probably unused
|
||||||
|
|
||||||
def getSection(self) -> str:
|
def get_section(self) -> str:
|
||||||
return convertMeta(
|
return convert_meta(
|
||||||
SpipRubriques.select()
|
SpipRubriques.select()
|
||||||
.where(SpipRubriques.id_rubrique == self.section_id)[0]
|
.where(SpipRubriques.id_rubrique == self.section_id)[0]
|
||||||
.titre
|
.titre
|
||||||
)
|
)
|
||||||
|
|
||||||
def getPath(self) -> str:
|
def get_path(self) -> str:
|
||||||
return (
|
return (
|
||||||
slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
|
slugify(self.get_section()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
|
||||||
)
|
)
|
||||||
|
|
||||||
def getFilename(self) -> str:
|
def get_filename(self) -> str:
|
||||||
return "index.fr.md"
|
return "index.fr.md"
|
||||||
|
|
||||||
def getAuthors(self) -> tuple:
|
def get_authors(self) -> tuple:
|
||||||
return (
|
return (
|
||||||
SpipAuteurs.select()
|
SpipAuteurs.select()
|
||||||
.join(
|
.join(
|
||||||
@ -64,7 +66,7 @@ class Article:
|
|||||||
.where(SpipAuteursLiens.id_objet == self.id)
|
.where(SpipAuteursLiens.id_objet == self.id)
|
||||||
)
|
)
|
||||||
|
|
||||||
def getFrontmatter(self) -> str:
|
def get_frontmatter(self) -> str:
|
||||||
return dump(
|
return dump(
|
||||||
{
|
{
|
||||||
"lang": self.lang,
|
"lang": self.lang,
|
||||||
@ -75,14 +77,14 @@ class Article:
|
|||||||
"lastmod": self.update,
|
"lastmod": self.update,
|
||||||
"draft": self.draft,
|
"draft": self.draft,
|
||||||
"description": self.description,
|
"description": self.description,
|
||||||
"authors": [author.nom for author in self.getAuthors()],
|
"authors": [author.nom for author in self.get_authors()],
|
||||||
},
|
},
|
||||||
allow_unicode=True,
|
allow_unicode=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def getArticle(self) -> str:
|
def get_article(self) -> str:
|
||||||
# Build the final article text
|
# Build the final article text
|
||||||
article: str = "---\n" + self.getFrontmatter() + "---"
|
article: str = "---\n" + self.get_frontmatter() + "---"
|
||||||
# If there is a caption, add the caption followed by a hr
|
# If there is a caption, add the caption followed by a hr
|
||||||
if len(self.caption) > 0:
|
if len(self.caption) > 0:
|
||||||
article += "\n\n" + self.caption + "\n\n***"
|
article += "\n\n" + self.caption + "\n\n***"
|
||||||
@ -90,7 +92,7 @@ class Article:
|
|||||||
if len(self.text) > 0:
|
if len(self.text) > 0:
|
||||||
article += "\n\n" + self.text
|
article += "\n\n" + self.text
|
||||||
# Same with an "extra" section
|
# Same with an "extra" section
|
||||||
if self.extra != None and len(self.extra) > 0:
|
if self.extra is not None and len(self.extra) > 0:
|
||||||
article += "\n\n# EXTRA\n\n" + self.extra
|
article += "\n\n# EXTRA\n\n" + self.extra
|
||||||
# PS
|
# PS
|
||||||
if len(self.ps) > 0:
|
if len(self.ps) > 0:
|
||||||
@ -100,10 +102,10 @@ class Article:
|
|||||||
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
|
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
|
||||||
return article
|
return article
|
||||||
|
|
||||||
def getUnknownChars(self) -> list[str]:
|
def get_unknown_chars(self) -> list[str]:
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
for text in (self.title, self.text):
|
for text in (self.title, self.text):
|
||||||
for char in unknownIso:
|
for char in unknown_iso:
|
||||||
for match in finditer(char + r".*(?=\r?\n|$)", text):
|
for match in finditer(char + r".*(?=\r?\n|$)", text):
|
||||||
errors.append(match.group())
|
errors.append(match.group())
|
||||||
return errors
|
return errors
|
||||||
@ -112,10 +114,10 @@ class Article:
|
|||||||
class Articles:
|
class Articles:
|
||||||
exported: int = 0
|
exported: int = 0
|
||||||
|
|
||||||
def __init__(self, maxToExport: int) -> None:
|
def __init__(self, maxexport: int) -> None:
|
||||||
# Query the DB to retrieve all articles sorted by publication date
|
# Query the DB to retrieve all articles sorted by publication date
|
||||||
self.articles = (
|
self.articles = (
|
||||||
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxToExport)
|
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxexport)
|
||||||
)
|
)
|
||||||
self.toExport: int = len(self.articles)
|
self.toExport: int = len(self.articles)
|
||||||
|
|
||||||
|
@ -4,36 +4,36 @@ from os.path import isfile
|
|||||||
from yaml import CLoader as Loader
|
from yaml import CLoader as Loader
|
||||||
from yaml import load
|
from yaml import load
|
||||||
|
|
||||||
configPaths = ("spip2md.yml", "spip2md.yaml")
|
config_paths = ("spip2md.yml", "spip2md.yaml")
|
||||||
|
|
||||||
|
|
||||||
class Configuration:
|
class Configuration:
|
||||||
db = "spip"
|
db = "spip"
|
||||||
dbHost = "localhost"
|
db_host = "localhost"
|
||||||
dbUser = "spip"
|
db_user = "spip"
|
||||||
dbPass = "password"
|
db_pass = "password"
|
||||||
outputDir = "output"
|
output_dir = "output"
|
||||||
defaultNbToExport = 1000
|
default_export_nb = 1000
|
||||||
|
|
||||||
def __init__(self, configFile: str | None = None) -> None:
|
def __init__(self, config_file: str | None = None) -> None:
|
||||||
if configFile != None:
|
if config_file is not None:
|
||||||
with open(configFile) as f:
|
with open(config_file) as f:
|
||||||
config = load(f.read(), Loader=Loader)
|
config = load(f.read(), Loader=Loader)
|
||||||
if "db" in config:
|
if "db" in config:
|
||||||
self.db = config["db"]
|
self.db = config["db"]
|
||||||
if "dbUser" in config:
|
if "db_user" in config:
|
||||||
self.dbUser = config["dbUser"]
|
self.db_user = config["db_user"]
|
||||||
if "dbPass" in config:
|
if "db_pass" in config:
|
||||||
self.dbPass = config["dbPass"]
|
self.db_pass = config["db_pass"]
|
||||||
if "outputDir" in config:
|
if "output_dir" in config:
|
||||||
self.outputDir = config["outputDir"]
|
self.output_dir = config["output_dir"]
|
||||||
if "defaultNbToExport" in config:
|
if "default_export_nb" in config:
|
||||||
self.defaultNbToExport = config["defaultNbToExport"]
|
self.default_export_nb = config["default_export_nb"]
|
||||||
|
|
||||||
|
|
||||||
config = Configuration()
|
config = Configuration()
|
||||||
|
|
||||||
for path in configPaths:
|
for path in config_paths:
|
||||||
if isfile(path):
|
if isfile(path):
|
||||||
config = Configuration(path)
|
config = Configuration(path)
|
||||||
break
|
break
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
from re import I, S, compile, finditer
|
from re import I, S, compile, finditer
|
||||||
|
|
||||||
# SPIP syntax to Markdown
|
# SPIP syntax to Markdown
|
||||||
spipToMarkdown = (
|
spip_to_markdown = (
|
||||||
( # horizontal rule
|
( # horizontal rule
|
||||||
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
|
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
|
||||||
# r"---",
|
# r"---",
|
||||||
@ -114,7 +114,7 @@ spipToMarkdown = (
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
spipToText = (
|
spip_to_text = (
|
||||||
( # strong
|
( # strong
|
||||||
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
@ -159,7 +159,7 @@ spipToText = (
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
isoToUtf = (
|
iso_to_utf = (
|
||||||
# Broken encoding
|
# Broken encoding
|
||||||
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
||||||
"’",
|
"’",
|
||||||
@ -253,44 +253,47 @@ isoToUtf = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
## WARNING unknown broken encoding
|
## WARNING unknown broken encoding
|
||||||
unknownIso = (
|
unknown_iso = (
|
||||||
r"
", # unknown 

|
r"
", # unknown 

|
||||||
r"∆", # unknown â^†
|
r"∆", # unknown â^†
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Define terminal escape sequences to stylize output, regex escaped
|
||||||
|
RED: str = "\033[91m"
|
||||||
|
BOLD: str = "\033[1m"
|
||||||
|
RESET: str = "\033[0m"
|
||||||
|
|
||||||
def convertBody(text: str) -> str:
|
|
||||||
for spip, markdown in spipToMarkdown:
|
def convert_body(text: str) -> str:
|
||||||
|
for spip, markdown in spip_to_markdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in iso_to_utf:
|
||||||
text.replace(iso, utf)
|
text.replace(iso, utf)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def convertMeta(text: str) -> str:
|
def convert_meta(text: str) -> str:
|
||||||
for spip, metadata in spipToText:
|
for spip, metadata in spip_to_text:
|
||||||
text = spip.sub(metadata, text)
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in iso_to_utf:
|
||||||
text.replace(iso, utf)
|
text.replace(iso, utf)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def removeUnknownChars(text: str) -> str:
|
def remove_unknown_chars(text: str) -> str:
|
||||||
for char in unknownIso:
|
for char in unknown_iso:
|
||||||
text.replace(char, "")
|
text.replace(char, "")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def highlightUnknownChars(text: str) -> str:
|
def highlight_unknown_chars(text: str) -> str:
|
||||||
# Define terminal escape sequences to stylize output, regex escaped
|
|
||||||
COLOR: str = "\033[91m" + "\033[1m" # Red + Bold
|
|
||||||
RESET: str = "\033[0m"
|
|
||||||
# Highlight in COLOR unknown chars in text
|
# Highlight in COLOR unknown chars in text
|
||||||
for char in unknownIso:
|
for char in unknown_iso:
|
||||||
for match in finditer(char, text):
|
for match in finditer(char, text):
|
||||||
text = (
|
text = (
|
||||||
text[: match.start()]
|
text[: match.start()]
|
||||||
+ COLOR
|
+ RED
|
||||||
|
+ BOLD
|
||||||
+ match.group()
|
+ match.group()
|
||||||
+ RESET
|
+ RESET
|
||||||
+ text[match.end() :]
|
+ text[match.end() :]
|
||||||
|
@ -1,7 +1,18 @@
|
|||||||
# pyright: basic
|
# pyright: basic
|
||||||
from peewee import (SQL, BigAutoField, BigIntegerField, CharField,
|
from peewee import (
|
||||||
CompositeKey, DateField, DateTimeField, FloatField,
|
SQL,
|
||||||
IntegerField, Model, MySQLDatabase, TextField)
|
BigAutoField,
|
||||||
|
BigIntegerField,
|
||||||
|
CharField,
|
||||||
|
CompositeKey,
|
||||||
|
DateField,
|
||||||
|
DateTimeField,
|
||||||
|
FloatField,
|
||||||
|
IntegerField,
|
||||||
|
Model,
|
||||||
|
MySQLDatabase,
|
||||||
|
TextField,
|
||||||
|
)
|
||||||
|
|
||||||
# class UnknownField(object):
|
# class UnknownField(object):
|
||||||
# def __init__(self, *_, **__):
|
# def __init__(self, *_, **__):
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# pyright: basic
|
# pyright: basic
|
||||||
from articles import Article, Articles
|
from articles import Article, Articles
|
||||||
from config import config
|
from config import config
|
||||||
from converter import highlightUnknownChars
|
from converter import highlight_unknown_chars
|
||||||
from database import db
|
from database import db
|
||||||
|
|
||||||
if __name__ != "__main__":
|
if __name__ != "__main__":
|
||||||
@ -13,18 +13,18 @@ from os import makedirs, mkdir
|
|||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
|
||||||
# Clean the output dir & create a new
|
# Clean the output dir & create a new
|
||||||
rmtree(config.outputDir, True)
|
rmtree(config.output_dir, True)
|
||||||
mkdir(config.outputDir)
|
mkdir(config.output_dir)
|
||||||
|
|
||||||
# Connect to the MySQL database with Peewee ORM
|
# Connect to the MySQL database with Peewee ORM
|
||||||
db.init(config.db, host=config.dbHost, user=config.dbUser, password=config.dbPass)
|
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
||||||
db.connect()
|
db.connect()
|
||||||
|
|
||||||
# Define max nb of articles to export based on first CLI param
|
# Define max nb of articles to export based on first CLI param
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
maxToExport = int(sys.argv[1])
|
maxexport = int(sys.argv[1])
|
||||||
else:
|
else:
|
||||||
maxToExport = config.defaultNbToExport
|
maxexport = config.default_export_nb
|
||||||
|
|
||||||
# Define terminal escape sequences to stylize output
|
# Define terminal escape sequences to stylize output
|
||||||
R: str = "\033[91m"
|
R: str = "\033[91m"
|
||||||
@ -34,38 +34,38 @@ BOLD: str = "\033[1m"
|
|||||||
RESET: str = "\033[0m"
|
RESET: str = "\033[0m"
|
||||||
|
|
||||||
# Articles that contains unknown chars
|
# Articles that contains unknown chars
|
||||||
unknownCharsArticles: list[Article] = []
|
unknown_chars_articles: list[Article] = []
|
||||||
|
|
||||||
# Loop among first maxToExport articles & export them
|
# Loop among first maxToExport articles & export them
|
||||||
for counter, article in Articles(maxToExport):
|
for counter, article in Articles(maxexport):
|
||||||
if (counter["exported"] - 1) % 100 == 0:
|
if (counter["exported"] - 1) % 100 == 0:
|
||||||
print(
|
print(
|
||||||
f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}"
|
f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}"
|
||||||
+ f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n"
|
+ f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n"
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
|
f"{BOLD}{counter['exported']}.{RESET} " + highlight_unknown_chars(article.title)
|
||||||
)
|
)
|
||||||
fullPath: str = config.outputDir + "/" + article.getPath()
|
fullpath: str = config.output_dir + "/" + article.get_path()
|
||||||
print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
|
print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
|
||||||
makedirs(fullPath, exist_ok=True)
|
makedirs(fullpath, exist_ok=True)
|
||||||
with open(fullPath + article.getFilename(), "w") as f:
|
with open(fullpath + article.get_filename(), "w") as f:
|
||||||
f.write(article.getArticle())
|
f.write(article.get_article())
|
||||||
# Store detected unknown characters
|
# Store detected unknown characters
|
||||||
if len(article.getUnknownChars()) > 0:
|
if len(article.get_unknown_chars()) > 0:
|
||||||
unknownCharsArticles.append(article)
|
unknown_chars_articles.append(article)
|
||||||
|
|
||||||
for article in unknownCharsArticles:
|
for article in unknown_chars_articles:
|
||||||
unknownCharsApparitions: list = article.getUnknownChars()
|
unknown_chars_apparitions: list = article.get_unknown_chars()
|
||||||
nb: int = len(unknownCharsApparitions)
|
nb: int = len(unknown_chars_apparitions)
|
||||||
s: str = "s" if nb > 1 else ""
|
s: str = "s" if nb > 1 else ""
|
||||||
print(
|
print(
|
||||||
f"\n{BOLD}{nb}{RESET} unknown character{s} "
|
f"\n{BOLD}{nb}{RESET} unknown character{s} "
|
||||||
+ f"detected in article {BOLD}{article.id}{RESET}"
|
+ f"detected in article {BOLD}{article.id}{RESET}"
|
||||||
+ f"\n{BOLD}·{RESET} "
|
+ f"\n{BOLD}·{RESET} "
|
||||||
+ highlightUnknownChars(article.title)
|
+ highlight_unknown_chars(article.title)
|
||||||
)
|
)
|
||||||
for text in unknownCharsApparitions:
|
for text in unknown_chars_apparitions:
|
||||||
print(f" {BOLD}…{RESET} " + highlightUnknownChars(text))
|
print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text))
|
||||||
|
|
||||||
db.close() # Close the database connection
|
db.close() # Close the database connection
|
||||||
|
Loading…
Reference in New Issue
Block a user