PEP8 refactor

This commit is contained in:
Guilhem Fauré 2023-05-16 15:55:51 +02:00
parent aa1b822688
commit e1c8bd4b2e
5 changed files with 98 additions and 82 deletions

View File

@ -1,23 +1,25 @@
# pyright: basic # pyright: basic
from re import finditer from re import finditer
from converter import convertBody, convertMeta, unknownIso
from database import *
from slugify import slugify from slugify import slugify
# from yaml import CDumper as Dumper
from yaml import dump from yaml import dump
from converter import convert_body, convert_meta, unknown_iso
from database import SpipArticles, SpipAuteurs, SpipAuteursLiens, SpipRubriques
# from yaml import CDumper as Dumper
class Article: class Article:
def __init__(self, article): def __init__(self, article):
self.id: int = article.id_article self.id: int = article.id_article
# self.surtitle = article.surtitre # Probably unused # self.surtitle = article.surtitre # Probably unused
self.title: str = convertMeta(article.titre) self.title: str = convert_meta(article.titre)
self.subtitle: str = article.soustitre # Probably unused self.subtitle: str = article.soustitre # Probably unused
self.section_id: int = article.id_rubrique self.section_id: int = article.id_rubrique
self.description: str = convertMeta(article.descriptif) self.description: str = convert_meta(article.descriptif)
self.caption: str = article.chapo # Probably unused self.caption: str = article.chapo # Probably unused
self.text: str = convertBody(article.texte) # Markdown self.text: str = convert_body(article.texte) # Markdown
self.ps: str = article.ps # Probably unused self.ps: str = article.ps # Probably unused
self.publicationDate: str = article.date self.publicationDate: str = article.date
self.draft: bool = False if article.statut == "publie" else True self.draft: bool = False if article.statut == "publie" else True
@ -39,22 +41,22 @@ class Article:
self.virtual: str = article.virtuel # TODO Why? self.virtual: str = article.virtuel # TODO Why?
self.microblog: str = article.microblog # Probably unused self.microblog: str = article.microblog # Probably unused
def getSection(self) -> str: def get_section(self) -> str:
return convertMeta( return convert_meta(
SpipRubriques.select() SpipRubriques.select()
.where(SpipRubriques.id_rubrique == self.section_id)[0] .where(SpipRubriques.id_rubrique == self.section_id)[0]
.titre .titre
) )
def getPath(self) -> str: def get_path(self) -> str:
return ( return (
slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/" slugify(self.get_section()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
) )
def getFilename(self) -> str: def get_filename(self) -> str:
return "index.fr.md" return "index.fr.md"
def getAuthors(self) -> tuple: def get_authors(self) -> tuple:
return ( return (
SpipAuteurs.select() SpipAuteurs.select()
.join( .join(
@ -64,7 +66,7 @@ class Article:
.where(SpipAuteursLiens.id_objet == self.id) .where(SpipAuteursLiens.id_objet == self.id)
) )
def getFrontmatter(self) -> str: def get_frontmatter(self) -> str:
return dump( return dump(
{ {
"lang": self.lang, "lang": self.lang,
@ -75,14 +77,14 @@ class Article:
"lastmod": self.update, "lastmod": self.update,
"draft": self.draft, "draft": self.draft,
"description": self.description, "description": self.description,
"authors": [author.nom for author in self.getAuthors()], "authors": [author.nom for author in self.get_authors()],
}, },
allow_unicode=True, allow_unicode=True,
) )
def getArticle(self) -> str: def get_article(self) -> str:
# Build the final article text # Build the final article text
article: str = "---\n" + self.getFrontmatter() + "---" article: str = "---\n" + self.get_frontmatter() + "---"
# If there is a caption, add the caption followed by a hr # If there is a caption, add the caption followed by a hr
if len(self.caption) > 0: if len(self.caption) > 0:
article += "\n\n" + self.caption + "\n\n***" article += "\n\n" + self.caption + "\n\n***"
@ -90,7 +92,7 @@ class Article:
if len(self.text) > 0: if len(self.text) > 0:
article += "\n\n" + self.text article += "\n\n" + self.text
# Same with an "extra" section # Same with an "extra" section
if self.extra != None and len(self.extra) > 0: if self.extra is not None and len(self.extra) > 0:
article += "\n\n# EXTRA\n\n" + self.extra article += "\n\n# EXTRA\n\n" + self.extra
# PS # PS
if len(self.ps) > 0: if len(self.ps) > 0:
@ -100,10 +102,10 @@ class Article:
article += "\n\n# MICROBLOGGING\n\n" + self.microblog article += "\n\n# MICROBLOGGING\n\n" + self.microblog
return article return article
def getUnknownChars(self) -> list[str]: def get_unknown_chars(self) -> list[str]:
errors: list[str] = [] errors: list[str] = []
for text in (self.title, self.text): for text in (self.title, self.text):
for char in unknownIso: for char in unknown_iso:
for match in finditer(char + r".*(?=\r?\n|$)", text): for match in finditer(char + r".*(?=\r?\n|$)", text):
errors.append(match.group()) errors.append(match.group())
return errors return errors
@ -112,10 +114,10 @@ class Article:
class Articles: class Articles:
exported: int = 0 exported: int = 0
def __init__(self, maxToExport: int) -> None: def __init__(self, maxexport: int) -> None:
# Query the DB to retrieve all articles sorted by publication date # Query the DB to retrieve all articles sorted by publication date
self.articles = ( self.articles = (
SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxToExport) SpipArticles.select().order_by(SpipArticles.date.desc()).limit(maxexport)
) )
self.toExport: int = len(self.articles) self.toExport: int = len(self.articles)

View File

@ -4,36 +4,36 @@ from os.path import isfile
from yaml import CLoader as Loader from yaml import CLoader as Loader
from yaml import load from yaml import load
configPaths = ("spip2md.yml", "spip2md.yaml") config_paths = ("spip2md.yml", "spip2md.yaml")
class Configuration: class Configuration:
db = "spip" db = "spip"
dbHost = "localhost" db_host = "localhost"
dbUser = "spip" db_user = "spip"
dbPass = "password" db_pass = "password"
outputDir = "output" output_dir = "output"
defaultNbToExport = 1000 default_export_nb = 1000
def __init__(self, configFile: str | None = None) -> None: def __init__(self, config_file: str | None = None) -> None:
if configFile != None: if config_file is not None:
with open(configFile) as f: with open(config_file) as f:
config = load(f.read(), Loader=Loader) config = load(f.read(), Loader=Loader)
if "db" in config: if "db" in config:
self.db = config["db"] self.db = config["db"]
if "dbUser" in config: if "db_user" in config:
self.dbUser = config["dbUser"] self.db_user = config["db_user"]
if "dbPass" in config: if "db_pass" in config:
self.dbPass = config["dbPass"] self.db_pass = config["db_pass"]
if "outputDir" in config: if "output_dir" in config:
self.outputDir = config["outputDir"] self.output_dir = config["output_dir"]
if "defaultNbToExport" in config: if "default_export_nb" in config:
self.defaultNbToExport = config["defaultNbToExport"] self.default_export_nb = config["default_export_nb"]
config = Configuration() config = Configuration()
for path in configPaths: for path in config_paths:
if isfile(path): if isfile(path):
config = Configuration(path) config = Configuration(path)
break break

View File

@ -2,7 +2,7 @@
from re import I, S, compile, finditer from re import I, S, compile, finditer
# SPIP syntax to Markdown # SPIP syntax to Markdown
spipToMarkdown = ( spip_to_markdown = (
( # horizontal rule ( # horizontal rule
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I), compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
# r"---", # r"---",
@ -114,7 +114,7 @@ spipToMarkdown = (
), ),
) )
spipToText = ( spip_to_text = (
( # strong ( # strong
compile(r"\{\{ *(.*?) *\}\}", S | I), compile(r"\{\{ *(.*?) *\}\}", S | I),
r"\1", r"\1",
@ -159,7 +159,7 @@ spipToText = (
), ),
) )
isoToUtf = ( iso_to_utf = (
# Broken encoding # Broken encoding
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 ( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
"’", "’",
@ -253,44 +253,47 @@ isoToUtf = (
) )
## WARNING unknown broken encoding ## WARNING unknown broken encoding
unknownIso = ( unknown_iso = (
r"
", # unknown 
 r"
", # unknown 

r"∆", # unknown â^† r"∆", # unknown â^†
) )
# Define terminal escape sequences to stylize output, regex escaped
RED: str = "\033[91m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
def convertBody(text: str) -> str:
for spip, markdown in spipToMarkdown: def convert_body(text: str) -> str:
for spip, markdown in spip_to_markdown:
text = spip.sub(markdown, text) text = spip.sub(markdown, text)
for iso, utf in isoToUtf: for iso, utf in iso_to_utf:
text.replace(iso, utf) text.replace(iso, utf)
return text return text
def convertMeta(text: str) -> str: def convert_meta(text: str) -> str:
for spip, metadata in spipToText: for spip, metadata in spip_to_text:
text = spip.sub(metadata, text) text = spip.sub(metadata, text)
for iso, utf in isoToUtf: for iso, utf in iso_to_utf:
text.replace(iso, utf) text.replace(iso, utf)
return text return text
def removeUnknownChars(text: str) -> str: def remove_unknown_chars(text: str) -> str:
for char in unknownIso: for char in unknown_iso:
text.replace(char, "") text.replace(char, "")
return text return text
def highlightUnknownChars(text: str) -> str: def highlight_unknown_chars(text: str) -> str:
# Define terminal escape sequences to stylize output, regex escaped
COLOR: str = "\033[91m" + "\033[1m" # Red + Bold
RESET: str = "\033[0m"
# Highlight in COLOR unknown chars in text # Highlight in COLOR unknown chars in text
for char in unknownIso: for char in unknown_iso:
for match in finditer(char, text): for match in finditer(char, text):
text = ( text = (
text[: match.start()] text[: match.start()]
+ COLOR + RED
+ BOLD
+ match.group() + match.group()
+ RESET + RESET
+ text[match.end() :] + text[match.end() :]

View File

@ -1,7 +1,18 @@
# pyright: basic # pyright: basic
from peewee import (SQL, BigAutoField, BigIntegerField, CharField, from peewee import (
CompositeKey, DateField, DateTimeField, FloatField, SQL,
IntegerField, Model, MySQLDatabase, TextField) BigAutoField,
BigIntegerField,
CharField,
CompositeKey,
DateField,
DateTimeField,
FloatField,
IntegerField,
Model,
MySQLDatabase,
TextField,
)
# class UnknownField(object): # class UnknownField(object):
# def __init__(self, *_, **__): # def __init__(self, *_, **__):

View File

@ -2,7 +2,7 @@
# pyright: basic # pyright: basic
from articles import Article, Articles from articles import Article, Articles
from config import config from config import config
from converter import highlightUnknownChars from converter import highlight_unknown_chars
from database import db from database import db
if __name__ != "__main__": if __name__ != "__main__":
@ -13,18 +13,18 @@ from os import makedirs, mkdir
from shutil import rmtree from shutil import rmtree
# Clean the output dir & create a new # Clean the output dir & create a new
rmtree(config.outputDir, True) rmtree(config.output_dir, True)
mkdir(config.outputDir) mkdir(config.output_dir)
# Connect to the MySQL database with Peewee ORM # Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.dbHost, user=config.dbUser, password=config.dbPass) db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect() db.connect()
# Define max nb of articles to export based on first CLI param # Define max nb of articles to export based on first CLI param
if len(sys.argv) > 1: if len(sys.argv) > 1:
maxToExport = int(sys.argv[1]) maxexport = int(sys.argv[1])
else: else:
maxToExport = config.defaultNbToExport maxexport = config.default_export_nb
# Define terminal escape sequences to stylize output # Define terminal escape sequences to stylize output
R: str = "\033[91m" R: str = "\033[91m"
@ -34,38 +34,38 @@ BOLD: str = "\033[1m"
RESET: str = "\033[0m" RESET: str = "\033[0m"
# Articles that contains unknown chars # Articles that contains unknown chars
unknownCharsArticles: list[Article] = [] unknown_chars_articles: list[Article] = []
# Loop among first maxToExport articles & export them # Loop among first maxToExport articles & export them
for counter, article in Articles(maxToExport): for counter, article in Articles(maxexport):
if (counter["exported"] - 1) % 100 == 0: if (counter["exported"] - 1) % 100 == 0:
print( print(
f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}" f"\n{BOLD}Exporting {R}{counter['remaining']+1}{RESET}"
+ f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n" + f"{BOLD} SPIP articles to Markdown & YAML files{RESET}\n"
) )
print( print(
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title) f"{BOLD}{counter['exported']}.{RESET} " + highlight_unknown_chars(article.title)
) )
fullPath: str = config.outputDir + "/" + article.getPath() fullpath: str = config.output_dir + "/" + article.get_path()
print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}") print(f"{BOLD}>{RESET} {fullpath}{article.get_filename()}")
makedirs(fullPath, exist_ok=True) makedirs(fullpath, exist_ok=True)
with open(fullPath + article.getFilename(), "w") as f: with open(fullpath + article.get_filename(), "w") as f:
f.write(article.getArticle()) f.write(article.get_article())
# Store detected unknown characters # Store detected unknown characters
if len(article.getUnknownChars()) > 0: if len(article.get_unknown_chars()) > 0:
unknownCharsArticles.append(article) unknown_chars_articles.append(article)
for article in unknownCharsArticles: for article in unknown_chars_articles:
unknownCharsApparitions: list = article.getUnknownChars() unknown_chars_apparitions: list = article.get_unknown_chars()
nb: int = len(unknownCharsApparitions) nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else "" s: str = "s" if nb > 1 else ""
print( print(
f"\n{BOLD}{nb}{RESET} unknown character{s} " f"\n{BOLD}{nb}{RESET} unknown character{s} "
+ f"detected in article {BOLD}{article.id}{RESET}" + f"detected in article {BOLD}{article.id}{RESET}"
+ f"\n{BOLD}·{RESET} " + f"\n{BOLD}·{RESET} "
+ highlightUnknownChars(article.title) + highlight_unknown_chars(article.title)
) )
for text in unknownCharsApparitions: for text in unknown_chars_apparitions:
print(f" {BOLD}{RESET} " + highlightUnknownChars(text)) print(f" {BOLD}{RESET} " + highlight_unknown_chars(text))
db.close() # Close the database connection db.close() # Close the database connection