refactor with inheritance between Article & Section

This commit is contained in:
Guilhem Fauré 2023-05-25 10:30:18 +02:00
parent f67cddd92c
commit 7e3680d282
2 changed files with 79 additions and 106 deletions

View File

@ -24,6 +24,7 @@ class Configuration:
data_dir: str = "data"
clear_output: bool = False
prepend_h1: bool = True
export_filetype: str = "md"
def __init__(self, config_file: Optional[str] = None):
if config_file is not None:

View File

@ -1,5 +1,6 @@
from os.path import basename, splitext
from re import I, compile
from re import I, compile, finditer
from typing import Any
from peewee import ModelSelect
from slugify import slugify
@ -35,27 +36,20 @@ class Document(SpipDocuments):
)
EXPORTTYPE: str = "md"
class Article(SpipArticles):
class Meta:
table_name: str = "spip_articles"
class SpipObject:
id: int
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Common fields that need conversions
self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True)
self.texte: str = convert(self.texte) # Convert SPIP to Markdown
self.statut: str = "false" if self.statut == "publie" else "true"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
# Article specific
self.surtitre: str = convert(self.surtitre, True) # Probably unused
self.soustitre: str = convert(self.soustitre, True) # Probably unused
self.chapo: str = convert(self.chapo) # Probably unused
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
# Define file prefix (need to be changed later)
self.prefix = "index"
def documents(self) -> ModelSelect:
documents = (
@ -64,7 +58,7 @@ class Article(SpipArticles):
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == self.id_article)
.where(SpipDocumentsLiens.id_objet == self.id)
)
for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
@ -76,11 +70,13 @@ class Article(SpipArticles):
return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str:
return "index" + "." + self.lang + "." + EXPORTTYPE
return self.prefix + "." + self.lang + "." + config.export_filetype
def frontmatter(self) -> str:
return dump(
{
raise NotImplementedError("Subclasses must implement 'frontmatter' method.")
def common_frontmatter(self) -> dict[str, Any]:
return {
"lang": self.lang,
"translationKey": self.id_trad,
"title": self.titre,
@ -89,19 +85,9 @@ class Article(SpipArticles):
"draft": self.statut,
"description": self.descriptif,
# Debugging
"spip_id": self.id_article,
"spip_id_secteur": self.id_secteur,
# Article specific
"summary": self.chapo,
"surtitle": self.surtitre,
"subtitle": self.soustitre,
"date": self.date_redac,
"authors": [author.nom for author in self.authors()],
# Debugging
"spip_id_rubrique": self.id_rubrique,
},
allow_unicode=True,
)
"spip_id": self.id,
}
def body(self) -> str:
body: str = ""
@ -115,21 +101,57 @@ class Article(SpipArticles):
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
# If there is a caption, add the caption followed by a hr
if hasattr(self, "caption") and len(self.caption) > 0:
body += "\n\n" + self.caption + "\n\n***"
# PS
if hasattr(self, "ps") and len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if hasattr(self, "microblog") and len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body
def content(self) -> str:
# Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body()
class Article(SpipObject, SpipArticles):
class Meta:
table_name: str = "spip_articles"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# More conversions needed for articles
self.surtitre: str = convert(self.surtitre, True) # Probably unused
self.soustitre: str = convert(self.soustitre, True) # Probably unused
self.chapo: str = convert(self.chapo) # Probably unused
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
# ID
self.id = self.id_article
def frontmatter(self) -> str:
return dump(
{
**super().common_frontmatter(),
# Article specific
"summary": self.chapo,
"surtitle": self.surtitre,
"subtitle": self.soustitre,
"date": self.date_redac,
"authors": [author.nom for author in self.authors()],
# Debugging
"spip_id_rubrique": self.id_rubrique,
},
allow_unicode=True,
)
def body(self) -> str:
body: str = super().body()
# If there is a caption, add the caption followed by a hr
if len(self.chapo) > 0:
body += "\n\n" + self.chapo + "\n\n***"
# PS
if len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body
def authors(self) -> list[SpipAuteurs]:
return (
SpipAuteurs.select()
@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
)
ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I)
def link_articles(text: str):
for match in ARTICLE_LINK.finditer(text):
for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text):
article = Article.get(Article.id_article == match.group(2))
if len(match.group(1)) > 0:
title: str = match.group(1)
@ -167,75 +186,28 @@ def link_articles(text: str):
return text
class Rubrique(SpipRubriques):
class Rubrique(SpipObject, SpipRubriques):
class Meta:
table_name: str = "spip_rubriques"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True)
self.texte: str = convert(self.texte) # Convert SPIP to Markdown
self.statut: str = "false" if self.statut == "publie" else "true"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
def documents(self) -> ModelSelect:
documents = (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == self.id_rubrique)
)
for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
# Internal (articles) links
self.texte = link_articles(self.texte)
return documents
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str:
return "_index" + "." + self.lang + "." + EXPORTTYPE
# ID
self.id = self.id_rubrique
# File prefix
self.prefix = "_index"
def frontmatter(self) -> str:
return dump(
{
"lang": self.lang,
"translationKey": self.id_trad,
"title": self.titre,
"publishDate": self.date,
"lastmod": self.maj,
"draft": self.statut,
"description": self.descriptif,
**super().common_frontmatter(),
# Debugging
"spip_id": self.id_rubrique,
"spip_id_secteur": self.id_secteur,
"spip_id_parent": self.id_parent,
"spip_profondeur": self.profondeur,
},
allow_unicode=True,
)
def body(self) -> str:
body: str = ""
# Add the title as a Markdown h1
if len(self.titre) > 0 and config.prepend_h1:
body += "\n\n# " + self.titre
# If there is a text, add the text preceded by two line breaks
if len(self.texte) > 0:
# Remove remaining HTML after & append to body
body += "\n\n" + self.texte
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
return body
def content(self) -> str:
# Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body()
# Query the DB to retrieve all sections sorted by publication date
def get_sections(limit: int = 10**6) -> ModelSelect: