PEP8 refactor

This commit is contained in:
Guilhem Fauré 2023-05-16 15:55:51 +02:00
parent aa1b822688
commit e1c8bd4b2e
5 changed files with 98 additions and 82 deletions

View File

@ -1,23 +1,25 @@
# pyright: basic
from re import finditer
from converter import convertBody, convertMeta, unknownIso
from database import *
from slugify import slugify
# from yaml import CDumper as Dumper
from yaml import dump
from converter import convert_body, convert_meta, unknown_iso
from database import SpipArticles, SpipAuteurs, SpipAuteursLiens, SpipRubriques
# from yaml import CDumper as Dumper
class Article:
def __init__(self, article):
self.id: int = article.id_article
# self.surtitle = article.surtitre # Probably unused
self.title: str = convertMeta(article.titre)
self.title: str = convert_meta(article.titre)
self.subtitle: str = article.soustitre # Probably unused
self.section_id: int = article.id_rubrique
self.description: str = convertMeta(article.descriptif)
self.description: str = convert_meta(article.descriptif)
self.caption: str = article.chapo # Probably unused
self.text: str = convertBody(article.texte) # Markdown
self.text: str = convert_body(article.texte) # Markdown
self.ps: str = article.ps # Probably unused
self.publicationDate: str = article.date
self.draft: bool = False if article.statut == "publie" else True
@ -39,22 +41,22 @@ class Article:
self.virtual: str = article.virtuel # TODO Why?
self.microblog: str = article.microblog # Probably unused
def getSection(self) -> str:
return convertMeta(
def get_section(self) -> str:
return convert_meta(
SpipRubriques.select()
.where(SpipRubriques.id_rubrique == self.section_id)[0]
.titre
)
def getPath(self) -> str:
def get_path(self) -> str:
return (
slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
slugify(self.get_section()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
)
def getFilename(self) -> str:
def get_filename(self) -> str:
return "index.fr.md"
def getAuthors(self) -> tuple:
def get_authors(self) -> tuple:
return (
SpipAuteurs.select()
.join(
@ -64,7 +66,7 @@ class Article:
.where(SpipAuteursLiens.id_objet == self.id)
)
def getFrontmatter(self) -> str:
def get_frontmatter(self) -> str:
return dump(
{
"lang": self.lang,
@ -75,14 +77,14 @@ class Article:
"lastmod": self.update,
"draft": self.draft,
"description": self.description,
"authors": [author.nom for author in self.getAuthors()],
"authors": [author.nom for author in self.get_authors()],
},
allow_unicode=True,
)
def getArticle(self) -> str:
def get_article(self) -> str:
# Build the final article text
article: str = "---\n" + self.getFrontmatter() + "---"
article: str = "---\n" + self.get_frontmatter() + "---"
# If there is a caption, add the caption followed by a hr
if len(self.caption) > 0:
article += "\n\n" + self.caption + "\n\n***"
@ -90,7 +92,7 @@ class Article:
if len(self.text) > 0:
article += "\n\n" + self.text
# Same with an "extra" section
if self.extra != None and len(self.extra) > 0:
if self.extra is not None and len(self.extra) > 0:
article += "\n\n# EXTRA\n\n" + self.extra
# PS
if len(self.ps) > 0:
@ -100,10 +102,10 @@ class Article:
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
return article
def getUnknownChars(self) -> list[str]:
def get_unknown_chars(self) -> list[str]:
errors: list[str] = []
for text in (self.title, self.text):
for char in unknownIso:
for char in unknown_iso:
for match in finditer(char + r".*(?=\r?\n|$)", text):
errors.append(match.group())
return errors
@ -112,10 +114,10 @@ class Article:
class Articles:
exported: int = 0
def __init__(self, maxToExport: int) -> None:
def __init__(self, maxexport: int) -> None:
# Query the DB to retrieve all articles sorted by publication date
self.articles = (
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxToExport)
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxexport)
)
self.toExport: int = len(self.articles)

View File

@ -4,36 +4,36 @@ from os.path import isfile
from yaml import CLoader as Loader
from yaml import load
configPaths = ("spip2md.yml", "spip2md.yaml")
config_paths = ("spip2md.yml", "spip2md.yaml")
class Configuration:
db = "spip"
dbHost = "localhost"
dbUser = "spip"
dbPass = "password"
outputDir = "output"
defaultNbToExport = 1000
db_host = "localhost"
db_user = "spip"
db_pass = "password"
output_dir = "output"
default_export_nb = 1000
def __init__(self, configFile: str | None = None) -> None:
if configFile != None:
with open(configFile) as f:
def __init__(self, config_file: str | None = None) -> None:
if config_file is not None:
with open(config_file) as f:
config = load(f.read(), Loader=Loader)
if "db" in config:
self.db = config["db"]
if "dbUser" in config:
self.dbUser = config["dbUser"]
if "dbPass" in config:
self.dbPass = config["dbPass"]
if "outputDir" in config:
self.outputDir = config["outputDir"]
if "defaultNbToExport" in config:
self.defaultNbToExport = config["defaultNbToExport"]
if "db_user" in config:
self.db_user = config["db_user"]
if "db_pass" in config:
self.db_pass = config["db_pass"]
if "output_dir" in config:
self.output_dir = config["output_dir"]
if "default_export_nb" in config:
self.default_export_nb = config["default_export_nb"]
config = Configuration()
for path in configPaths:
for path in config_paths:
if isfile(path):
config = Configuration(path)
break

View File

@ -2,7 +2,7 @@
from re import I, S, compile, finditer
# SPIP syntax to Markdown
spipToMarkdown = (
spip_to_markdown = (
( # horizontal rule
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
# r"---",
@ -114,7 +114,7 @@ spipToMarkdown = (
),
)
spipToText = (
spip_to_text = (
( # strong
compile(r"\{\{ *(.*?) *\}\}", S | I),
r"\1",
@ -159,7 +159,7 @@ spipToText = (
),
)
isoToUtf = (
iso_to_utf = (
# Broken encoding
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
"’",
@ -253,44 +253,47 @@ isoToUtf = (
)
## WARNING unknown broken encoding
unknownIso = (
unknown_iso = (
r"
", # unknown 

r"∆", # unknown â^†
)
# Define terminal escape sequences to stylize output, regex escaped
RED: str = "\033[91m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
def convertBody(text: str) -> str:
for spip, markdown in spipToMarkdown:
def convert_body(text: str) -> str:
for spip, markdown in spip_to_markdown:
text = spip.sub(markdown, text)
for iso, utf in isoToUtf:
for iso, utf in iso_to_utf:
text.replace(iso, utf)
return text
def convertMeta(text: str) -> str:
for spip, metadata in spipToText:
def convert_meta(text: str) -> str:
for spip, metadata in spip_to_text:
text = spip.sub(metadata, text)
for iso, utf in isoToUtf:
for iso, utf in iso_to_utf:
text.replace(iso, utf)
return text
def removeUnknownChars(text: str) -> str:
for char in unknownIso:
def remove_unknown_chars(text: str) -> str:
for char in unknown_iso:
text.replace(char, "")
return text
def highlightUnknownChars(text: str) -> str:
# Define terminal escape sequences to stylize output, regex escaped
COLOR: str = "\033[91m" + "\033[1m" # Red + Bold
RESET: str = "\033[0m"
def highlight_unknown_chars(text: str) -> str:
# Highlight in COLOR unknown chars in text
for char in unknownIso:
for char in unknown_iso:
for match in finditer(char, text):
text = (
text[: match.start()]
+ COLOR
+ RED
+ BOLD
+ match.group()
+ RESET
+ text[match.end() :]

View File

@ -1,7 +1,18 @@
# pyright: basic
from peewee import (SQL, BigAutoField, BigIntegerField, CharField,
CompositeKey, DateField, DateTimeField, FloatField,
IntegerField, Model, MySQLDatabase, TextField)
from peewee import (
SQL,
BigAutoField,
BigIntegerField,
CharField,
CompositeKey,
DateField,
DateTimeField,
FloatField,
IntegerField,
Model,
MySQLDatabase,
TextField,
)
# class UnknownField(object):
# def __init__(self, *_, **__):

View File

@ -2,7 +2,7 @@
# pyright: basic
from articles import Article, Articles
from config import config
from converter import highlightUnknownChars
from converter import highlight_unknown_chars
from database import db
if __name__ != "__main__":
@ -13,18 +13,18 @@ from os import makedirs, mkdir
from shutil import rmtree
# Clean the output dir & create a new
rmtree(config.outputDir, True)
mkdir(config.outputDir)
rmtree(config.output_dir, True)
mkdir(config.output_dir)
# Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.dbHost, user=config.dbUser, password=config.dbPass)
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect()
# Define max nb of articles to export based on first CLI param
if len(sys.argv) > 1:
maxToExport = int(sys.argv[1])
maxexport = int(sys.argv[1])
else:
maxToExport = config.defaultNbToExport
maxexport = config.default_export_nb
# Define terminal escape sequences to stylize output
R: str = "\033[91m"
@ -34,38 +34,38 @@ BOLD: str = "\033[1m"
RESET: str = "\033[0m"
# Articles that contains unknown chars
unknownCharsArticles: list[Article] = []
unknown_chars_articles: list[Article] = []
# Loop among first maxToExport articles & export them
for counter, article in Articles(maxToExport):
for counter, article in Articles(maxexport):
if (counter["exported"] - 1) % 100 == 0:
print(
f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}"
+ f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n"
)
print(
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
f"{BOLD}{counter['exported']}.{RESET} " + highlight_unknown_chars(article.title)
)
fullPath: str = config.outputDir + "/" + article.getPath()
print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
makedirs(fullPath, exist_ok=True)
with open(fullPath + article.getFilename(), "w") as f:
f.write(article.getArticle())
fullpath: str = config.output_dir + "/" + article.get_path()
print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
makedirs(fullpath, exist_ok=True)
with open(fullpath + article.get_filename(), "w") as f:
f.write(article.get_article())
# Store detected unknown characters
if len(article.getUnknownChars()) > 0:
unknownCharsArticles.append(article)
if len(article.get_unknown_chars()) > 0:
unknown_chars_articles.append(article)
for article in unknownCharsArticles:
unknownCharsApparitions: list = article.getUnknownChars()
nb: int = len(unknownCharsApparitions)
for article in unknown_chars_articles:
unknown_chars_apparitions: list = article.get_unknown_chars()
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
print(
f"\n{BOLD}{nb}{RESET} unknown character{s} "
+ f"detected in article {BOLD}{article.id}{RESET}"
+ f"\n{BOLD}·{RESET} "
+ highlightUnknownChars(article.title)
+ highlight_unknown_chars(article.title)
)
for text in unknownCharsApparitions:
print(f" {BOLD}{RESET} " + highlightUnknownChars(text))
for text in unknown_chars_apparitions:
print(f" {BOLD}{RESET} " + highlight_unknown_chars(text))
db.close() # Close the database connection