From 7e3680d282999fb6a141203f94c07d74e50b48fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Thu, 25 May 2023 10:30:18 +0200 Subject: [PATCH] refactor with inheritance between Article & Section --- spip2md/config.py | 1 + spip2md/spipobjects.py | 184 +++++++++++++++++------------------------ 2 files changed, 79 insertions(+), 106 deletions(-) diff --git a/spip2md/config.py b/spip2md/config.py index d84af5d..c5ad8f9 100644 --- a/spip2md/config.py +++ b/spip2md/config.py @@ -24,6 +24,7 @@ class Configuration: data_dir: str = "data" clear_output: bool = False prepend_h1: bool = True + export_filetype: str = "md" def __init__(self, config_file: Optional[str] = None): if config_file is not None: diff --git a/spip2md/spipobjects.py b/spip2md/spipobjects.py index b73e528..eb2093e 100644 --- a/spip2md/spipobjects.py +++ b/spip2md/spipobjects.py @@ -1,5 +1,6 @@ from os.path import basename, splitext -from re import I, compile +from re import I, compile, finditer +from typing import Any from peewee import ModelSelect from slugify import slugify @@ -35,27 +36,20 @@ class Document(SpipDocuments): ) -EXPORTTYPE: str = "md" - - -class Article(SpipArticles): - class Meta: - table_name: str = "spip_articles" +class SpipObject: + id: int def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # Common fields that need conversions self.titre: str = convert(self.titre, True) self.descriptif: str = convert(self.descriptif, True) self.texte: str = convert(self.texte) # Convert SPIP to Markdown self.statut: str = "false" if self.statut == "publie" else "true" self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true" self.extra: str = convert(self.extra) # Probably unused - # Article specific - self.surtitre: str = convert(self.surtitre, True) # Probably unused - self.soustitre: str = convert(self.soustitre, True) # Probably unused - self.chapo: str = convert(self.chapo) # Probably unused - self.ps: str = convert(self.ps) # Probably unused - self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false" + # Define file prefix (need to be changed later) + self.prefix = "index" def documents(self) -> ModelSelect: documents = ( @@ -64,7 +58,7 @@ class Article(SpipArticles): SpipDocumentsLiens, on=(Document.id_document == SpipDocumentsLiens.id_document), ) - .where(SpipDocumentsLiens.id_objet == self.id_article) + .where(SpipDocumentsLiens.id_objet == self.id) ) for d in documents: self.texte = link_document(self.texte, d.id_document, d.titre, d.slug()) @@ -76,32 +70,24 @@ class Article(SpipArticles): return slugify((self.date + "-" if date else "") + self.titre) def filename(self) -> str: - return "index" + "." + self.lang + "." + EXPORTTYPE + return self.prefix + "." + self.lang + "." + config.export_filetype def frontmatter(self) -> str: - return dump( - { - "lang": self.lang, - "translationKey": self.id_trad, - "title": self.titre, - "publishDate": self.date, - "lastmod": self.maj, - "draft": self.statut, - "description": self.descriptif, - # Debugging - "spip_id": self.id_article, - "spip_id_secteur": self.id_secteur, - # Article specific - "summary": self.chapo, - "surtitle": self.surtitre, - "subtitle": self.soustitre, - "date": self.date_redac, - "authors": [author.nom for author in self.authors()], - # Debugging - "spip_id_rubrique": self.id_rubrique, - }, - allow_unicode=True, - ) + raise NotImplementedError("Subclasses must implement 'frontmatter' method.") + + def common_frontmatter(self) -> dict[str, Any]: + return { + "lang": self.lang, + "translationKey": self.id_trad, + "title": self.titre, + "publishDate": self.date, + "lastmod": self.maj, + "draft": self.statut, + "description": self.descriptif, + # Debugging + "spip_id_secteur": self.id_secteur, + "spip_id": self.id, + } def body(self) -> str: body: str = "" @@ -115,21 +101,57 @@ class Article(SpipArticles): # Same with an "extra" section if len(self.extra) > 0: body += "\n\n# EXTRA\n\n" + self.extra - # If there is a caption, add the caption followed by a hr - if hasattr(self, "caption") and len(self.caption) > 0: - body += "\n\n" + self.caption + "\n\n***" - # PS - if hasattr(self, "ps") and len(self.ps) > 0: - body += "\n\n# POST-SCRIPTUM\n\n" + self.ps - # Microblog - if hasattr(self, "microblog") and len(self.microblog) > 0: - body += "\n\n# MICROBLOGGING\n\n" + self.microblog return body def content(self) -> str: # Return the final article text return "---\n" + self.frontmatter() + "---" + self.body() + +class Article(SpipObject, SpipArticles): + class Meta: + table_name: str = "spip_articles" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # More conversions needed for articles + self.surtitre: str = convert(self.surtitre, True) # Probably unused + self.soustitre: str = convert(self.soustitre, True) # Probably unused + self.chapo: str = convert(self.chapo) # Probably unused + self.ps: str = convert(self.ps) # Probably unused + self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false" + # ID + self.id = self.id_article + + def frontmatter(self) -> str: + return dump( + { + **super().common_frontmatter(), + # Article specific + "summary": self.chapo, + "surtitle": self.surtitre, + "subtitle": self.soustitre, + "date": self.date_redac, + "authors": [author.nom for author in self.authors()], + # Debugging + "spip_id_rubrique": self.id_rubrique, + }, + allow_unicode=True, + ) + + def body(self) -> str: + body: str = super().body() + # If there is a caption, add the caption followed by a hr + if len(self.chapo) > 0: + body += "\n\n" + self.chapo + "\n\n***" + # PS + if len(self.ps) > 0: + body += "\n\n# POST-SCRIPTUM\n\n" + self.ps + # Microblog + if len(self.microblog) > 0: + body += "\n\n# MICROBLOGGING\n\n" + self.microblog + return body + def authors(self) -> list[SpipAuteurs]: return ( SpipAuteurs.select() @@ -151,11 +173,8 @@ def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect: ) -ARTICLE_LINK = compile(r"\[(.*?)]\((?:art|article)([0-9]+)\)", I) - - def link_articles(text: str): - for match in ARTICLE_LINK.finditer(text): + for match in finditer(r"\[(.*?)]\((?:art|article)([0-9]+)\)", text): article = Article.get(Article.id_article == match.group(2)) if len(match.group(1)) > 0: title: str = match.group(1) @@ -167,75 +186,28 @@ def link_articles(text: str): return text -class Rubrique(SpipRubriques): +class Rubrique(SpipObject, SpipRubriques): class Meta: table_name: str = "spip_rubriques" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.titre: str = convert(self.titre, True) - self.descriptif: str = convert(self.descriptif, True) - self.texte: str = convert(self.texte) # Convert SPIP to Markdown - self.statut: str = "false" if self.statut == "publie" else "true" - self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true" - self.extra: str = convert(self.extra) # Probably unused - - def documents(self) -> ModelSelect: - documents = ( - Document.select() - .join( - SpipDocumentsLiens, - on=(Document.id_document == SpipDocumentsLiens.id_document), - ) - .where(SpipDocumentsLiens.id_objet == self.id_rubrique) - ) - for d in documents: - self.texte = link_document(self.texte, d.id_document, d.titre, d.slug()) - # Internal (articles) links - self.texte = link_articles(self.texte) - return documents - - def slug(self, date: bool = False) -> str: - return slugify((self.date + "-" if date else "") + self.titre) - - def filename(self) -> str: - return "_index" + "." + self.lang + "." + EXPORTTYPE + # ID + self.id = self.id_rubrique + # File prefix + self.prefix = "_index" def frontmatter(self) -> str: return dump( { - "lang": self.lang, - "translationKey": self.id_trad, - "title": self.titre, - "publishDate": self.date, - "lastmod": self.maj, - "draft": self.statut, - "description": self.descriptif, + **super().common_frontmatter(), # Debugging - "spip_id": self.id_rubrique, - "spip_id_secteur": self.id_secteur, + "spip_id_parent": self.id_parent, + "spip_profondeur": self.profondeur, }, allow_unicode=True, ) - def body(self) -> str: - body: str = "" - # Add the title as a Markdown h1 - if len(self.titre) > 0 and config.prepend_h1: - body += "\n\n# " + self.titre - # If there is a text, add the text preceded by two line breaks - if len(self.texte) > 0: - # Remove remaining HTML after & append to body - body += "\n\n" + self.texte - # Same with an "extra" section - if len(self.extra) > 0: - body += "\n\n# EXTRA\n\n" + self.extra - return body - - def content(self) -> str: - # Return the final article text - return "---\n" + self.frontmatter() + "---" + self.body() - # Query the DB to retrieve all sections sorted by publication date def get_sections(limit: int = 10**6) -> ModelSelect: