From 13fa720562f0c08e1917498254a31575ebeaea38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Wed, 24 May 2023 10:43:39 +0200 Subject: [PATCH] refactor to use Peewee objects extension in place of redefining every SPIP atribute --- spip2md/{converter.py => converters.py} | 125 ++++-------- spip2md/database.py | 5 +- spip2md/items.py | 259 ------------------------ spip2md/main.py | 80 ++++---- spip2md/spipobjects.py | 212 +++++++++++++++++++ 5 files changed, 298 insertions(+), 383 deletions(-) rename spip2md/{converter.py => converters.py} (72%) delete mode 100644 spip2md/items.py create mode 100644 spip2md/spipobjects.py diff --git a/spip2md/converter.py b/spip2md/converters.py similarity index 72% rename from spip2md/converter.py rename to spip2md/converters.py index 848a240..d17182a 100644 --- a/spip2md/converter.py +++ b/spip2md/converters.py @@ -3,7 +3,7 @@ from re import I, S, compile, finditer, sub from typing import Optional # SPIP syntax to Markdown -spip_to_markdown = ( +SPIP_TO_MARKDOWN = ( ( # horizontal rule compile(r"- ?- ?- ?- ?[\- ]*|
", S | I), # r"---", @@ -40,6 +40,14 @@ spip_to_markdown = ( ), r"~\1~", ), + ( # images + compile(r"<(img|image)([0-9]+)(\|.*?)*>", S | I), + r"![](\1\2)", + ), + ( # documents & embeds + compile(r"<(doc|document|emb)([0-9]+)(\|.*?)*>", S | I), + r"[](\1\2)", + ), ( # anchor compile(r"\[ *(.*?) *-> *(.*?) *\]", S | I), r"[\1](\2)", @@ -100,58 +108,20 @@ spip_to_markdown = ( ), r"\1", ), -) - -spip_to_text = ( - ( # strong - compile(r"\{\{ *(.*?) *\}\}", S | I), - r"\1", - ), - ( # html strong - compile(r" *(.*?) *", S | I), - r"\1", - ), - ( # emphasis - compile(r"\{ *(.*?) *\}", S | I), - r"\1", - ), - ( # html emphasis - compile(r" *(.*?) *<\/i>", S | I), - r"\1", - ), - ( # strikethrough - compile( - r"\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)", - S | I, - ), - r"\1", - ), - ( # Keep only the first language in multi-language blocks - compile( - r"\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>", - S | I, - ), - r"\1", - ), - ( # remove every html tag - compile(r"<\/?.*?> *", S | I), - r"", - ), - ( # Remove beginning with angle bracket(s) - compile(r"^>+ +", S | I), - r"", - ), - ( # Remove beginning with a number followed by a dot - compile(r"^\d+\. +", S | I), + ( # WARNING remove every html tag + compile(r"<\/?.*?>\s*", S | I), r"", ), ) -# HTML tag WARNING can be used to remove them all -html_tag = compile(r"<\/?.*?> *", S | I) +# Further cleaning for metadata texts such as titles or descriptions +SPIP_META_BLOAT = ( + compile(r"^>+ +", S | I), # Remove beginning with angle bracket(s) + compile(r"^\d+\. +", S | I), # Remove beginning with a number followed by a dot +) # Broken ISO encoding to proper UTF-8 -iso_to_utf = ( +ISO_TO_UTF = ( ( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1 "’", r"’", @@ -264,82 +234,71 @@ iso_to_utf = ( ) # WARNING unknown broken encoding -unknown_iso = ( +UNKNOWN_ISO = ( r"
", r"∆", r"û", ) -# Apply spip_to_markdown conversions to a text -def convert_body(text: Optional[str]) -> str: +# Apply SPIP to Markdown & ISO to UTF conversions to a text, & eventually clean meta +def convert(text: Optional[str], clean_meta: bool = False) -> str: if text is None: return "" - for spip, markdown in spip_to_markdown: + for spip, markdown in SPIP_TO_MARKDOWN: text = spip.sub(markdown, text) - for iso, utf in iso_to_utf: + if clean_meta: + for bloat in SPIP_META_BLOAT: + text = bloat.sub("", text) + for iso, utf in ISO_TO_UTF: text = text.replace(iso, utf) return text -# Apply spip_to_text conversions to a text -def convert_meta(text: Optional[str]) -> str: - if text is None: - return "" - for spip, metadata in spip_to_text: - text = spip.sub(metadata, text) - for iso, utf in iso_to_utf: - text = text.replace(iso, utf) - return text - - -# Replace images & documents in SPIP text with Markdown links with human-readable names -def convert_documents(text: str, documents: list[tuple[int, str, str]]) -> str: +# Replace images & files links in Markdown with real slugs of the actually linked files +def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str: for id, name, slug in documents: + # Replace images that dont have a title written in text text = sub( - r"<(?:img|image)" + str(id) + r"(\|.*?)*>", + r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)", f"![{name}]({slug})", text, ) + # Replace images that dont have a title written in text text = sub( - r"<(?:doc|emb)" + str(id) + r"(\|.*?)*>", + r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)", f"[{name}]({slug})", text, ) + # Replace images that already had a title in Markdown style link text = sub( - r"\[(.*?)\]\((?:doc|emb)" + str(id) + r"(\|.*?)*\)", + r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)", + f"![\\1]({slug})", + text, + ) + # Replace documents that already had a title in Markdown style link + text = sub( + r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)", f"[\\1]({slug})", text, ) return text -# Replace unknown chars with empty strings (delete them) -def remove_unknown_chars(text: str) -> str: - for char in unknown_iso: - text.replace(char, "") - return text - - -# Replace HTML tags chars with empty strings (delete them) -def remove_tags(text: str) -> str: - return html_tag.sub("", text) - - # Return a list of tuples giving the start and end of unknown substring in text def unknown_chars(text: str) -> list[tuple[int, int]]: positions: list[tuple[int, int]] = [] - for char in unknown_iso: + for char in UNKNOWN_ISO: for match in finditer("(" + char + ")+", text): positions.append((match.start(), match.end())) return positions # Return strings with unknown chards found in text, surrounded by context_length chars -def get_unknown_chars(text: str, context_length: int = 20) -> list[str]: +def unknown_chars_context(text: str, context_length: int = 20) -> list[str]: errors: list[str] = [] context: str = r".{0," + str(context_length) + r"}" - for char in unknown_iso: + for char in UNKNOWN_ISO: matches = finditer( context + r"(?=" + char + r")" + char + r".*?(?=\r?\n|$)", text, diff --git a/spip2md/database.py b/spip2md/database.py index ec9fd47..eb75acb 100644 --- a/spip2md/database.py +++ b/spip2md/database.py @@ -1,4 +1,3 @@ -# pyright: basic # type: ignore from peewee import ( SQL, @@ -15,7 +14,7 @@ from peewee import ( TextField, ) -db = MySQLDatabase(None) +DB = MySQLDatabase(None) # class UnknownField(object): @@ -25,7 +24,7 @@ db = MySQLDatabase(None) class BaseModel(Model): class Meta: - database: MySQLDatabase = db + database: MySQLDatabase = DB class SpipArticles(BaseModel): diff --git a/spip2md/items.py b/spip2md/items.py deleted file mode 100644 index f443db1..0000000 --- a/spip2md/items.py +++ /dev/null @@ -1,259 +0,0 @@ -# pyright: strict -from os.path import basename, splitext -from typing import Any, Optional - -from slugify import slugify -from yaml import dump - -from converter import convert_body, convert_documents, convert_meta, remove_tags -from database import ( - SpipArticles, - SpipAuteurs, - SpipAuteursLiens, - SpipDocuments, - SpipDocumentsLiens, - SpipRubriques, -) - -EXPORTTYPE: str = "md" - - -class Iterator: - items: list[Any] - - def __init__(self) -> None: - # Set the limit at the number of retrieved items - self.LIMIT: int = len(self.items) - # Start before the first element - self.count: int = -1 - - def __iter__(self): - return self - - def __len__(self) -> int: - return self.LIMIT - - def remaining(self) -> int: - return self.LIMIT - self.count - - def __next__(self) -> Any: - self.count += 1 - if self.remaining() <= 0: - raise StopIteration - return self.items[self.count] - - -class Document: - def __init__(self, document: SpipDocuments) -> None: - self.id: int = document.id_document - self.thumbnail_id: int = document.id_vignette - self.title: str = convert_meta(document.titre) - self.date: str = document.date - self.description: str = convert_meta(document.descriptif) - self.file: str = document.fichier - self.draft: bool = document.statut == "publie" - self.creation: str = document.date - self.publication: str = document.date_publication - self.update: str = document.maj - self.media: str = document.media - - def get_slug(self, date: bool = False) -> str: - name_type = splitext(basename(self.file)) - return ( - slugify((self.publication + "-" if date else "") + name_type[0]) - + name_type[1] - ) - - -class Documents(Iterator): - def __init__(self, object_id: int) -> None: - # Query the DB to retrieve all documents related to object of id object_id - items = ( - SpipDocuments.select() - .join( - SpipDocumentsLiens, - on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document), - ) - .where(SpipDocumentsLiens.id_objet == object_id) - ) - self.items: list[Document] = [Document(i) for i in items] - super().__init__() - - -class Item: - id: int - - def __init__(self, item: SpipArticles | SpipRubriques): - self.title: str = convert_meta(item.titre) - self.section_id: int = item.id_rubrique - self.description: str = convert_meta(item.descriptif) - self.text: str = convert_body(item.texte) # Convert SPIP to Markdown - self.publication: str = item.date - self.draft: bool = item.statut == "publie" - self.sector_id: int = item.id_secteur - self.update: str = item.maj - self.lang: str = item.lang - self.set_lang: bool = item.langue_choisie == "oui" # TODO Why ? - self.translation_key: int = item.id_trad - self.extra: str = convert_body(item.extra) # Probably unused - - def get_slug(self, date: bool = False) -> str: - return slugify((self.publication + "-" if date else "") + self.title) - - def get_filename(self) -> str: - return "index" + "." + self.lang + "." + EXPORTTYPE - - def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str: - return dump( - { - "lang": self.lang, - "translationKey": self.translation_key, - "title": self.title, - "publishDate": self.publication, - "lastmod": self.update, - "draft": self.draft, - "description": self.description, - # Debugging - "spip_id": self.id, - "spip_id_secteur": self.sector_id, - } - | append - if append is not None - else {}, - allow_unicode=True, - ) - - def get_body(self) -> str: - body: str = "" - # Add the title as a Markdown h1 - if len(self.title) > 0: - body += "\n\n# " + self.title - # If there is a text, add the text preceded by two line breaks - if len(self.text) > 0: - # Convert images & files links - text: str = convert_documents( - self.text, - [(d.id, d.title, d.get_slug()) for d in self.get_documents()], - ) - # Remove remaining HTML after & append to body - body += "\n\n" + remove_tags(text) - # Same with an "extra" section - if len(self.extra) > 0: - body += "\n\n# EXTRA\n\n" + self.extra - return body - - def get_content(self) -> str: - # Return the final article text - return "---\n" + self.get_frontmatter() + "---" + self.get_body() - - def get_documents(self) -> Documents: - return Documents(self.id) - - -class Article(Item): - def __init__(self, article: SpipArticles): - super().__init__(article) - self.id: int = article.id_article - self.surtitle: str = convert_meta(article.surtitre) # Probably unused - self.subtitle: str = convert_meta(article.soustitre) # Probably unused - self.caption: str = convert_body(article.chapo) # Probably unused - self.ps: str = convert_body(article.ps) # Probably unused - self.update_2: str = article.date_modif # Probably unused duplicate of maj - self.creation: str = article.date_redac - self.forum: bool = article.accepter_forum == "oui" # TODO Why ? - self.sitename: str = article.nom_site # Probably useless - self.virtual: str = article.virtuel # TODO Why ? - self.microblog: str = article.microblog # Probably unused - # self.export = article.export # USELESS - # self.views: int = article.visites # USELESS in static - # self.referers: int = article.referers # USELESS in static - # self.popularity: float = article.popularite # USELESS in static - # self.version = article.id_version # USELESS - - def get_authors(self) -> list[SpipAuteurs]: - return ( - SpipAuteurs.select() - .join( - SpipAuteursLiens, - on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur), - ) - .where(SpipAuteursLiens.id_objet == self.id) - ) - - def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str: - return super().get_frontmatter( - { - "surtitle": self.surtitle, - "subtitle": self.subtitle, - "date": self.creation, - "authors": [author.nom for author in self.get_authors()], - # Debugging - "spip_id_rubrique": self.section_id, - "spip_id_secteur": self.sector_id, - "spip_chapo": self.caption, - } - | append - if append is not None - else {}, - ) - - def get_body(self) -> str: - body: str = super().get_body() - # If there is a caption, add the caption followed by a hr - if hasattr(self, "caption") and len(self.caption) > 0: - body += "\n\n" + self.caption + "\n\n***" - # PS - if hasattr(self, "ps") and len(self.ps) > 0: - body += "\n\n# POST-SCRIPTUM\n\n" + self.ps - # Microblog - if hasattr(self, "microblog") and len(self.microblog) > 0: - body += "\n\n# MICROBLOGGING\n\n" + self.microblog - return body - - -class Section(Item): - def __init__(self, section: SpipRubriques): - super().__init__(section) - self.id: int = section.id_rubrique - self.parent_id: int = section.id_parent - self.depth: int = section.profondeur - self.agenda: int = section.agenda - - def get_filename(self) -> str: - return "_" + super().get_filename() - - def get_articles(self, limit: int = 0): - return Articles(self.id, limit) - - -class Articles(Iterator): - def __init__(self, section_id: int, limit: int = 0): - # Query the DB to retrieve all articles sorted by publication date - if limit > 0: - items = ( - SpipArticles.select() - .where(SpipArticles.id_rubrique == section_id) - .order_by(SpipArticles.date.desc()) - .limit(limit) - ) - else: - items = ( - SpipArticles.select() - .where(SpipArticles.id_rubrique == section_id) - .order_by(SpipArticles.date.desc()) - ) - self.items: list[Article] = [Article(i) for i in items] - super().__init__() - - -class Sections(Iterator): - def __init__(self, limit: int = 0): - # Query the DB to retrieve all sections sorted by publication date - if limit > 0: - items = ( - SpipRubriques.select().order_by(SpipRubriques.date.desc()).limit(limit) - ) - else: - items = SpipRubriques.select().order_by(SpipRubriques.date.desc()) - self.items: list[Section] = [Section(i) for i in items] - super().__init__() diff --git a/spip2md/main.py b/spip2md/main.py index 00b73b6..9bd0336 100755 --- a/spip2md/main.py +++ b/spip2md/main.py @@ -1,18 +1,19 @@ #!python -# pyright: strict from os import makedirs from os.path import expanduser from shutil import copyfile, rmtree from sys import argv from config import config -from converter import get_unknown_chars, unknown_chars -from database import db -from items import ( +from converters import unknown_chars, unknown_chars_context +from database import DB +from spipobjects import ( Article, Document, - Section, - Sections, + Rubrique, + get_articles, + get_documents, + get_sections, ) @@ -64,26 +65,27 @@ def indent(nb: int = 1) -> None: # Connect to the MySQL database with Peewee ORM -db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) -db.connect() +DB.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) +DB.connect() # Output information about ongoing export & write section to output destination -def write_section(index: int, total: int, section: Section) -> str: +def write_section(index: int, total: int, section: Rubrique) -> str: + color = G # Associate sections to green # Print the name of the exported section & number of remaining sections style(f"{index + 1}. ", BO) - highlight(section.title, *unknown_chars(section.title)) - style(f" {total-index-1}", BO, G) + highlight(section.titre, *unknown_chars(section.titre)) + style(f" {total-index-1}", BO, color) style(f" section{s(total-index)} left") # Define the section’s path (directory) & create directory(ies) if needed - sectiondir: str = config.output_dir + "/" + section.get_slug() + sectiondir: str = config.output_dir + "/" + section.slug() makedirs(sectiondir, exist_ok=True) # Define the section filename & write the index at that filename - sectionpath: str = sectiondir + "/" + section.get_filename() + sectionpath: str = sectiondir + "/" + section.filename() with open(sectionpath, "w") as f: - f.write(section.get_content()) + f.write(section.content()) # Print export location when finished exporting - style(" -> ", BO, G) + style(" -> ", BO, color) print(sectionpath) # Return the first "limit" articles of section return sectiondir @@ -91,30 +93,31 @@ def write_section(index: int, total: int, section: Section) -> str: # Output information about ongoing export & write article to output destination def write_article(index: int, total: int, article: Article, sectiondir: str) -> str: + color = Y # Associate articles to yellow # Print the remaining number of articles to export every 100 articles if index % 100 == 0: indent() print("Exporting", end="") - style(f" {total-index}", BO, Y) + style(f" {total-index}", BO, color) print(" SPIP", end="") style(f" article{s(total-index)}") print(" to Markdown & YAML files") # Print the title of the article being exported style( f" {index + 1}. " - + ("EMPTY " if len(article.text) < 1 else "") + + ("EMPTY " if len(article.texte) < 1 else "") + f"{article.lang} " ) - highlight(article.title, *unknown_chars(article.title)) + highlight(article.titre, *unknown_chars(article.titre)) # Define the full article path & create directory(ies) if needed - articledir: str = sectiondir + "/" + article.get_slug() + articledir: str = sectiondir + "/" + article.slug() makedirs(articledir, exist_ok=True) # Define the article filename & write the article at the filename - articlepath: str = articledir + "/" + article.get_filename() + articlepath: str = articledir + "/" + article.filename() with open(articlepath, "w") as f: - f.write(article.get_content()) + f.write(article.content()) # Print export location when finished exporting - style(" -> ", BO, B) + style(" -> ", BO, color) print(articlepath) return articledir @@ -123,34 +126,35 @@ def write_article(index: int, total: int, article: Article, sectiondir: str) -> def write_document( index: int, total: int, document: Document, objectdir: str, indent_depth: int = 1 ) -> None: + color = B # Associate documents to blue if index % 100 == 0: indent(indent_depth) print("Exporting", end="") - style(f" {total-index}", BO, B) + style(f" {total-index}", BO, color) style(f" document{s(total-index)}\n") # Print the name of the file with a counter indent(indent_depth) style(f"{index + 1}. {document.media} ") - if len(document.title) > 0: - highlight(document.title + " ", *unknown_chars(document.title)) + if len(document.titre) > 0: + highlight(document.titre + " ", *unknown_chars(document.titre)) style("at ") - print(document.file, end="") + print(document.fichier, end="") # Define document path - documentpath: str = expanduser(config.data_dir + "/" + document.file) + documentpath: str = expanduser(config.data_dir + "/" + document.fichier) # Copy the document from it’s SPIP location to the new location try: - copyfile(documentpath, objectdir + "/" + document.get_slug()) + copyfile(documentpath, objectdir + "/" + document.slug()) except FileNotFoundError: style(" -> NOT FOUND!\n", BO, R) else: # Print the outputted file’s path when copied the file - style(" ->", BO, B) - print(f" {objectdir}/{document.get_slug()}") + style(" ->", BO, color) + print(f" {objectdir}/{document.slug()}") # Return true if an article field contains an unknown character def has_unknown_chars(article: Article) -> bool: - if len(get_unknown_chars(article.text)) > 0: + if len(unknown_chars_context(article.texte)) > 0: return True return False @@ -159,13 +163,13 @@ def has_unknown_chars(article: Article) -> bool: def warn_unknown_chars(article: Article) -> None: # Print the title of the article in which there is unknown characters # & the number of them - unknown_chars_apparitions: list[str] = get_unknown_chars(article.text) + unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte) nb: int = len(unknown_chars_apparitions) s: str = "s" if nb > 1 else "" style(f"{nb}") print(f" unknown character{s} in", end="") style(f" {article.lang} ") - highlight(article.title, *unknown_chars(article.title)) + highlight(article.titre, *unknown_chars(article.titre)) print() # Break line # Print the context in which the unknown characters are found for text in unknown_chars_apparitions: @@ -197,7 +201,7 @@ if __name__ == "__main__": unknown_chars_articles: list[Article] = [] # Get sections with an eventual maximum - sections = Sections(max_sections_export) + sections = get_sections(max_sections_export) nb_sections_export: int = len(sections) # Loop among sections & export them @@ -205,11 +209,11 @@ if __name__ == "__main__": # Write the section & store its articles sectiondir = write_section(i, nb_sections_export, section) # Loop over section’s related files (images …) - documents = section.get_documents() + documents = get_documents(section.id_rubrique) for i, document in enumerate(documents): write_document(i, len(documents), document, sectiondir) # Loop over section’s articles - articles = section.get_articles(max_articles_export) + articles = get_articles(section.id_rubrique, (max_articles_export)) for i, article in enumerate(articles): articledir = write_article(i, len(articles), article, sectiondir) # Add article to unknown_chars_articles if needed @@ -218,7 +222,7 @@ if __name__ == "__main__": # Decrement export limit max_articles_export -= 1 # Loop over article’s related files (images …) - documents = section.get_documents() + documents = get_documents(article.id_article) for i, document in enumerate(documents): write_document(i, len(documents), document, sectiondir, 2) # Break line when finished exporting the section @@ -229,4 +233,4 @@ if __name__ == "__main__": for article in unknown_chars_articles: warn_unknown_chars(article) - db.close() # Close the connection with the database + DB.close() # Close the connection with the database diff --git a/spip2md/spipobjects.py b/spip2md/spipobjects.py new file mode 100644 index 0000000..d6a8805 --- /dev/null +++ b/spip2md/spipobjects.py @@ -0,0 +1,212 @@ +from os.path import basename, splitext + +from peewee import ModelSelect +from slugify import slugify +from yaml import dump + +from converters import convert +from database import ( + SpipArticles, + SpipAuteurs, + SpipAuteursLiens, + SpipDocuments, + SpipDocumentsLiens, + SpipRubriques, +) + +EXPORTTYPE: str = "md" + +# Convert images & files links +# text: str = convert_documents( +# self.texte, +# [(d.id, d.titre, d.slug()) for d in self.documents()], +# ) + + +class Document(SpipDocuments): + class Meta: + table_name: str = "spip_documents" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.titre: str = convert(self.titre, True) + self.descriptif: str = convert(self.descriptif, True) + self.statut: str = "false" if self.statut == "publie" else "true" + + def slug(self, date: bool = False) -> str: + name_type: tuple[str, str] = splitext(basename(self.fichier)) + return ( + slugify((self.date_publication + "-" if date else "") + name_type[0]) + + name_type[1] + ) + + +class Article(SpipArticles): + class Meta: + table_name: str = "spip_articles" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.titre: str = convert(self.titre, True) + self.descriptif: str = convert(self.descriptif, True) + self.texte: str = convert(self.texte) # Convert SPIP to Markdown + self.statut: str = "false" if self.statut == "publie" else "true" + self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true" + self.extra: str = convert(self.extra) # Probably unused + # Article specific + self.surtitle: str = convert(self.surtitre, True) # Probably unused + self.subtitle: str = convert(self.soustitre, True) # Probably unused + self.caption: str = convert(self.chapo) # Probably unused + self.ps: str = convert(self.ps) # Probably unused + self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false" + + def slug(self, date: bool = False) -> str: + return slugify((self.date + "-" if date else "") + self.titre) + + def filename(self) -> str: + return "index" + "." + self.lang + "." + EXPORTTYPE + + def frontmatter(self) -> str: + return dump( + { + "lang": self.lang, + "translationKey": self.id_trad, + "title": self.titre, + "publishDate": self.date, + "lastmod": self.maj, + "draft": self.statut, + "description": self.descriptif, + # Debugging + "spip_id": self.id_article, + "spip_id_secteur": self.id_secteur, + # Article specific + "surtitle": self.surtitle, + "subtitle": self.subtitle, + "date": self.date_redac, + "authors": [author.nom for author in self.authors()], + # Debugging + "spip_id_rubrique": self.id_rubrique, + "spip_chapo": self.caption, + }, + allow_unicode=True, + ) + + def body(self) -> str: + body: str = "" + # Add the title as a Markdown h1 + if len(self.titre) > 0: + body += "\n\n# " + self.titre + # If there is a text, add the text preceded by two line breaks + if len(self.texte) > 0: + # Remove remaining HTML after & append to body + body += "\n\n" + # Same with an "extra" section + if len(self.extra) > 0: + body += "\n\n# EXTRA\n\n" + self.extra + # If there is a caption, add the caption followed by a hr + if hasattr(self, "caption") and len(self.caption) > 0: + body += "\n\n" + self.caption + "\n\n***" + # PS + if hasattr(self, "ps") and len(self.ps) > 0: + body += "\n\n# POST-SCRIPTUM\n\n" + self.ps + # Microblog + if hasattr(self, "microblog") and len(self.microblog) > 0: + body += "\n\n# MICROBLOGGING\n\n" + self.microblog + return body + + def content(self) -> str: + # Return the final article text + return "---\n" + self.frontmatter() + "---" + self.body() + + def authors(self) -> list[SpipAuteurs]: + return ( + SpipAuteurs.select() + .join( + SpipAuteursLiens, + on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur), + ) + .where(SpipAuteursLiens.id_objet == self.id_article) + ) + + +class Rubrique(SpipRubriques): + class Meta: + table_name: str = "spip_rubriques" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.titre: str = convert(self.titre, True) + self.descriptif: str = convert(self.descriptif, True) + self.texte: str = convert(self.texte) # Convert SPIP to Markdown + self.statut: str = "false" if self.statut == "publie" else "true" + self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true" + self.extra: str = convert(self.extra) # Probably unused + + def slug(self, date: bool = False) -> str: + return slugify((self.date + "-" if date else "") + self.titre) + + def filename(self) -> str: + return "index" + "." + self.lang + "." + EXPORTTYPE + + def frontmatter(self) -> str: + return dump( + { + "lang": self.lang, + "translationKey": self.id_trad, + "title": self.titre, + "publishDate": self.date, + "lastmod": self.maj, + "draft": self.statut, + "description": self.descriptif, + # Debugging + "spip_id": self.id_rubrique, + "spip_id_secteur": self.id_secteur, + }, + allow_unicode=True, + ) + + def body(self) -> str: + body: str = "" + # Add the title as a Markdown h1 + if len(self.titre) > 0: + body += "\n\n# " + self.titre + # If there is a text, add the text preceded by two line breaks + if len(self.texte) > 0: + # Remove remaining HTML after & append to body + body += "\n\n" + # Same with an "extra" section + if len(self.extra) > 0: + body += "\n\n# EXTRA\n\n" + self.extra + return body + + def content(self) -> str: + # Return the final article text + return "---\n" + self.frontmatter() + "---" + self.body() + + +# Query the DB to retrieve all sections sorted by publication date +def get_sections(limit: int = 10**6) -> ModelSelect: + return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit) + + +# Query the DB to retrieve all articles sorted by publication date +def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect: + return ( + Article.select() + .where(Article.id_rubrique == section_id) + .order_by(Article.date.desc()) + .limit(limit) + ) + + +# Query the DB to retrieve all documents related to object of id object_id +def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect: + return ( + Document.select() + .join( + SpipDocumentsLiens, + on=(Document.id_document == SpipDocumentsLiens.id_document), + ) + .where(SpipDocumentsLiens.id_objet == object_id) + .limit(limit) + )