refactor with inheritance between Article & Section

This commit is contained in:
Guilhem Fauré 2023-05-25 10:30:18 +02:00
parent f67cddd92c
commit 7e3680d282
2 changed files with 79 additions and 106 deletions

View File

@ -24,6 +24,7 @@ class Configuration:
data_dir: str = "data" data_dir: str = "data"
clear_output: bool = False clear_output: bool = False
prepend_h1: bool = True prepend_h1: bool = True
export_filetype: str = "md"
def __init__(self, config_file: Optional[str] = None): def __init__(self, config_file: Optional[str] = None):
if config_file is not None: if config_file is not None:

View File

@ -1,5 +1,6 @@
from os.path import basename, splitext from os.path import basename, splitext
from re import I, compile from re import I, compile, finditer
from typing import Any
from peewee import ModelSelect from peewee import ModelSelect
from slugify import slugify from slugify import slugify
@ -35,27 +36,20 @@ class Document(SpipDocuments):
) )
EXPORTTYPE: str = "md" class SpipObject:
id: int
class Article(SpipArticles):
class Meta:
table_name: str = "spip_articles"
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# Common fields that need conversions
self.titre: str = convert(self.titre, True) self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True) self.descriptif: str = convert(self.descriptif, True)
self.texte: str = convert(self.texte) # Convert SPIP to Markdown self.texte: str = convert(self.texte) # Convert SPIP to Markdown
self.statut: str = "false" if self.statut == "publie" else "true" self.statut: str = "false" if self.statut == "publie" else "true"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true" self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused self.extra: str = convert(self.extra) # Probably unused
# Article specific # Define file prefix (need to be changed later)
self.surtitre: str = convert(self.surtitre, True) # Probably unused self.prefix = "index"
self.soustitre: str = convert(self.soustitre, True) # Probably unused
self.chapo: str = convert(self.chapo) # Probably unused
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
def documents(self) -> ModelSelect: def documents(self) -> ModelSelect:
documents = ( documents = (
@ -64,7 +58,7 @@ class Article(SpipArticles):
SpipDocumentsLiens, SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document), on=(Document.id_document == SpipDocumentsLiens.id_document),
) )
.where(SpipDocumentsLiens.id_objet == self.id_article) .where(SpipDocumentsLiens.id_objet == self.id)
) )
for d in documents: for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug()) self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
@ -76,11 +70,13 @@ class Article(SpipArticles):
return slugify((self.date + "-" if date else "") + self.titre) return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str: def filename(self) -> str:
return "index" + "." + self.lang + "." + EXPORTTYPE return self.prefix + "." + self.lang + "." + config.export_filetype
def frontmatter(self) -> str: def frontmatter(self) -> str:
return dump( raise NotImplementedError("Subclasses must implement 'frontmatter' method.")
{
def common_frontmatter(self) -> dict[str, Any]:
return {
"lang": self.lang, "lang": self.lang,
"translationKey": self.id_trad, "translationKey": self.id_trad,
"title": self.titre, "title": self.titre,
@ -89,19 +85,9 @@ class Article(SpipArticles):
"draft": self.statut, "draft": self.statut,
"description": self.descriptif, "description": self.descriptif,
# Debugging # Debugging
"spip_id": self.id_article,
"spip_id_secteur": self.id_secteur, "spip_id_secteur": self.id_secteur,
# Article specific "spip_id": self.id,
"summary": self.chapo, }
"surtitle": self.surtitre,
"subtitle": self.soustitre,
"date": self.date_redac,
"authors": [author.nom for author in self.authors()],
# Debugging
"spip_id_rubrique": self.id_rubrique,
},
allow_unicode=True,
)
def body(self) -> str: def body(self) -> str:
body: str = "" body: str = ""
@ -115,21 +101,57 @@ class Article(SpipArticles):
# Same with an "extra" section # Same with an "extra" section
if len(self.extra) > 0: if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra body += "\n\n# EXTRA\n\n" + self.extra
# If there is a caption, add the caption followed by a hr
if hasattr(self, "caption") and len(self.caption) > 0:
body += "\n\n" + self.caption + "\n\n***"
# PS
if hasattr(self, "ps") and len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if hasattr(self, "microblog") and len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body return body
def content(self) -> str: def content(self) -> str:
# Return the final article text # Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body() return "---\n" + self.frontmatter() + "---" + self.body()
class Article(SpipObject, SpipArticles):
class Meta:
table_name: str = "spip_articles"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# More conversions needed for articles
self.surtitre: str = convert(self.surtitre, True) # Probably unused
self.soustitre: str = convert(self.soustitre, True) # Probably unused
self.chapo: str = convert(self.chapo) # Probably unused
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
# ID
self.id = self.id_article
def frontmatter(self) -> str:
return dump(
{
**super().common_frontmatter(),
# Article specific
"summary": self.chapo,
"surtitle": self.surtitre,
"subtitle": self.soustitre,
"date": self.date_redac,
"authors": [author.nom for author in self.authors()],
# Debugging
"spip_id_rubrique": self.id_rubrique,
},
allow_unicode=True,
)
def body(self) -> str:
body: str = super().body()
# If there is a caption, add the caption followed by a hr
if len(self.chapo) > 0:
body += "\n\n" + self.chapo + "\n\n***"
# PS
if len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body
def authors(self) -> list[SpipAuteurs]: def authors(self) -> list[SpipAuteurs]:
return ( return (
SpipAuteurs.select() SpipAuteurs.select()
@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
) )
ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I)
def link_articles(text: str): def link_articles(text: str):
for match in ARTICLE_LINK.finditer(text): for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text):
article = Article.get(Article.id_article == match.group(2)) article = Article.get(Article.id_article == match.group(2))
if len(match.group(1)) > 0: if len(match.group(1)) > 0:
title: str = match.group(1) title: str = match.group(1)
@ -167,75 +186,28 @@ def link_articles(text: str):
return text return text
class Rubrique(SpipRubriques): class Rubrique(SpipObject, SpipRubriques):
class Meta: class Meta:
table_name: str = "spip_rubriques" table_name: str = "spip_rubriques"
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.titre: str = convert(self.titre, True) # ID
self.descriptif: str = convert(self.descriptif, True) self.id = self.id_rubrique
self.texte: str = convert(self.texte) # Convert SPIP to Markdown # File prefix
self.statut: str = "false" if self.statut == "publie" else "true" self.prefix = "_index"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
def documents(self) -> ModelSelect:
documents = (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == self.id_rubrique)
)
for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
# Internal (articles) links
self.texte = link_articles(self.texte)
return documents
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str:
return "_index" + "." + self.lang + "." + EXPORTTYPE
def frontmatter(self) -> str: def frontmatter(self) -> str:
return dump( return dump(
{ {
"lang": self.lang, **super().common_frontmatter(),
"translationKey": self.id_trad,
"title": self.titre,
"publishDate": self.date,
"lastmod": self.maj,
"draft": self.statut,
"description": self.descriptif,
# Debugging # Debugging
"spip_id": self.id_rubrique, "spip_id_parent": self.id_parent,
"spip_id_secteur": self.id_secteur, "spip_profondeur": self.profondeur,
}, },
allow_unicode=True, allow_unicode=True,
) )
def body(self) -> str:
body: str = ""
# Add the title as a Markdown h1
if len(self.titre) > 0 and config.prepend_h1:
body += "\n\n# " + self.titre
# If there is a text, add the text preceded by two line breaks
if len(self.texte) > 0:
# Remove remaining HTML after & append to body
body += "\n\n" + self.texte
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
return body
def content(self) -> str:
# Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body()
# Query the DB to retrieve all sections sorted by publication date # Query the DB to retrieve all sections sorted by publication date
def get_sections(limit: int = 10**6) -> ModelSelect: def get_sections(limit: int = 10**6) -> ModelSelect: