refactor to use Peewee objects extension in place of redefining every SPIP atribute

This commit is contained in:
Guilhem Fauré 2023-05-24 10:43:39 +02:00
parent 4d269357de
commit 13fa720562
5 changed files with 298 additions and 383 deletions

View File

@ -3,7 +3,7 @@ from re import I, S, compile, finditer, sub
from typing import Optional
# SPIP syntax to Markdown
spip_to_markdown = (
SPIP_TO_MARKDOWN = (
( # horizontal rule
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
# r"---",
@ -40,6 +40,14 @@ spip_to_markdown = (
),
r"~\1~",
),
( # images
compile(r"<(img|image)([0-9]+)(\|.*?)*>", S | I),
r"![](\1\2)",
),
( # documents & embeds
compile(r"<(doc|document|emb)([0-9]+)(\|.*?)*>", S | I),
r"[](\1\2)",
),
( # anchor
compile(r"\[ *(.*?) *-> *(.*?) *\]", S | I),
r"[\1](\2)",
@ -100,58 +108,20 @@ spip_to_markdown = (
),
r"\1",
),
)
spip_to_text = (
( # strong
compile(r"\{\{ *(.*?) *\}\}", S | I),
r"\1",
),
( # html strong
compile(r"<strong> *(.*?) *</strong>", S | I),
r"\1",
),
( # emphasis
compile(r"\{ *(.*?) *\}", S | I),
r"\1",
),
( # html emphasis
compile(r"<i> *(.*?) *<\/i>", S | I),
r"\1",
),
( # strikethrough
compile(
r"<del>\s*(.*?)\s*(?:(\r?\n){2,}|<\/del>)",
S | I,
),
r"\1",
),
( # Keep only the first language in multi-language blocks
compile(
r"<multi>\s*(?:\[.{2,4}\])?\s*(.*?)\s*(?:\s*\[.{2,4}\].*)*<\/multi>",
S | I,
),
r"\1",
),
( # remove every html tag
compile(r"<\/?.*?> *", S | I),
r"",
),
( # Remove beginning with angle bracket(s)
compile(r"^>+ +", S | I),
r"",
),
( # Remove beginning with a number followed by a dot
compile(r"^\d+\. +", S | I),
( # WARNING remove every html tag
compile(r"<\/?.*?>\s*", S | I),
r"",
),
)
# HTML tag WARNING can be used to remove them all
html_tag = compile(r"<\/?.*?> *", S | I)
# Further cleaning for metadata texts such as titles or descriptions
SPIP_META_BLOAT = (
compile(r"^>+ +", S | I), # Remove beginning with angle bracket(s)
compile(r"^\d+\. +", S | I), # Remove beginning with a number followed by a dot
)
# Broken ISO encoding to proper UTF-8
iso_to_utf = (
ISO_TO_UTF = (
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
"’",
r"",
@ -264,82 +234,71 @@ iso_to_utf = (
)
# WARNING unknown broken encoding
unknown_iso = (
UNKNOWN_ISO = (
r"
",
r"∆",
r"û",
)
# Apply spip_to_markdown conversions to a text
def convert_body(text: Optional[str]) -> str:
# Apply SPIP to Markdown & ISO to UTF conversions to a text, & eventually clean meta
def convert(text: Optional[str], clean_meta: bool = False) -> str:
if text is None:
return ""
for spip, markdown in spip_to_markdown:
for spip, markdown in SPIP_TO_MARKDOWN:
text = spip.sub(markdown, text)
for iso, utf in iso_to_utf:
if clean_meta:
for bloat in SPIP_META_BLOAT:
text = bloat.sub("", text)
for iso, utf in ISO_TO_UTF:
text = text.replace(iso, utf)
return text
# Apply spip_to_text conversions to a text
def convert_meta(text: Optional[str]) -> str:
if text is None:
return ""
for spip, metadata in spip_to_text:
text = spip.sub(metadata, text)
for iso, utf in iso_to_utf:
text = text.replace(iso, utf)
return text
# Replace images & documents in SPIP text with Markdown links with human-readable names
def convert_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
# Replace images & files links in Markdown with real slugs of the actually linked files
def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
for id, name, slug in documents:
# Replace images that dont have a title written in text
text = sub(
r"<(?:img|image)" + str(id) + r"(\|.*?)*>",
r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![{name}]({slug})",
text,
)
# Replace images that dont have a title written in text
text = sub(
r"<(?:doc|emb)" + str(id) + r"(\|.*?)*>",
r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[{name}]({slug})",
text,
)
# Replace images that already had a title in Markdown style link
text = sub(
r"\[(.*?)\]\((?:doc|emb)" + str(id) + r"(\|.*?)*\)",
r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![\\1]({slug})",
text,
)
# Replace documents that already had a title in Markdown style link
text = sub(
r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[\\1]({slug})",
text,
)
return text
# Replace unknown chars with empty strings (delete them)
def remove_unknown_chars(text: str) -> str:
for char in unknown_iso:
text.replace(char, "")
return text
# Replace HTML tags chars with empty strings (delete them)
def remove_tags(text: str) -> str:
return html_tag.sub("", text)
# Return a list of tuples giving the start and end of unknown substring in text
def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in unknown_iso:
for char in UNKNOWN_ISO:
for match in finditer("(" + char + ")+", text):
positions.append((match.start(), match.end()))
return positions
# Return strings with unknown chards found in text, surrounded by context_length chars
def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
def unknown_chars_context(text: str, context_length: int = 20) -> list[str]:
errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}"
for char in unknown_iso:
for char in UNKNOWN_ISO:
matches = finditer(
context + r"(?=" + char + r")" + char + r".*?(?=\r?\n|$)",
text,

View File

@ -1,4 +1,3 @@
# pyright: basic
# type: ignore
from peewee import (
SQL,
@ -15,7 +14,7 @@ from peewee import (
TextField,
)
db = MySQLDatabase(None)
DB = MySQLDatabase(None)
# class UnknownField(object):
@ -25,7 +24,7 @@ db = MySQLDatabase(None)
class BaseModel(Model):
class Meta:
database: MySQLDatabase = db
database: MySQLDatabase = DB
class SpipArticles(BaseModel):

View File

@ -1,259 +0,0 @@
# pyright: strict
from os.path import basename, splitext
from typing import Any, Optional
from slugify import slugify
from yaml import dump
from converter import convert_body, convert_documents, convert_meta, remove_tags
from database import (
SpipArticles,
SpipAuteurs,
SpipAuteursLiens,
SpipDocuments,
SpipDocumentsLiens,
SpipRubriques,
)
EXPORTTYPE: str = "md"
class Iterator:
items: list[Any]
def __init__(self) -> None:
# Set the limit at the number of retrieved items
self.LIMIT: int = len(self.items)
# Start before the first element
self.count: int = -1
def __iter__(self):
return self
def __len__(self) -> int:
return self.LIMIT
def remaining(self) -> int:
return self.LIMIT - self.count
def __next__(self) -> Any:
self.count += 1
if self.remaining() <= 0:
raise StopIteration
return self.items[self.count]
class Document:
def __init__(self, document: SpipDocuments) -> None:
self.id: int = document.id_document
self.thumbnail_id: int = document.id_vignette
self.title: str = convert_meta(document.titre)
self.date: str = document.date
self.description: str = convert_meta(document.descriptif)
self.file: str = document.fichier
self.draft: bool = document.statut == "publie"
self.creation: str = document.date
self.publication: str = document.date_publication
self.update: str = document.maj
self.media: str = document.media
def get_slug(self, date: bool = False) -> str:
name_type = splitext(basename(self.file))
return (
slugify((self.publication + "-" if date else "") + name_type[0])
+ name_type[1]
)
class Documents(Iterator):
def __init__(self, object_id: int) -> None:
# Query the DB to retrieve all documents related to object of id object_id
items = (
SpipDocuments.select()
.join(
SpipDocumentsLiens,
on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == object_id)
)
self.items: list[Document] = [Document(i) for i in items]
super().__init__()
class Item:
id: int
def __init__(self, item: SpipArticles | SpipRubriques):
self.title: str = convert_meta(item.titre)
self.section_id: int = item.id_rubrique
self.description: str = convert_meta(item.descriptif)
self.text: str = convert_body(item.texte) # Convert SPIP to Markdown
self.publication: str = item.date
self.draft: bool = item.statut == "publie"
self.sector_id: int = item.id_secteur
self.update: str = item.maj
self.lang: str = item.lang
self.set_lang: bool = item.langue_choisie == "oui" # TODO Why?
self.translation_key: int = item.id_trad
self.extra: str = convert_body(item.extra) # Probably unused
def get_slug(self, date: bool = False) -> str:
return slugify((self.publication + "-" if date else "") + self.title)
def get_filename(self) -> str:
return "index" + "." + self.lang + "." + EXPORTTYPE
def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
return dump(
{
"lang": self.lang,
"translationKey": self.translation_key,
"title": self.title,
"publishDate": self.publication,
"lastmod": self.update,
"draft": self.draft,
"description": self.description,
# Debugging
"spip_id": self.id,
"spip_id_secteur": self.sector_id,
}
| append
if append is not None
else {},
allow_unicode=True,
)
def get_body(self) -> str:
body: str = ""
# Add the title as a Markdown h1
if len(self.title) > 0:
body += "\n\n# " + self.title
# If there is a text, add the text preceded by two line breaks
if len(self.text) > 0:
# Convert images & files links
text: str = convert_documents(
self.text,
[(d.id, d.title, d.get_slug()) for d in self.get_documents()],
)
# Remove remaining HTML after & append to body
body += "\n\n" + remove_tags(text)
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
return body
def get_content(self) -> str:
# Return the final article text
return "---\n" + self.get_frontmatter() + "---" + self.get_body()
def get_documents(self) -> Documents:
return Documents(self.id)
class Article(Item):
def __init__(self, article: SpipArticles):
super().__init__(article)
self.id: int = article.id_article
self.surtitle: str = convert_meta(article.surtitre) # Probably unused
self.subtitle: str = convert_meta(article.soustitre) # Probably unused
self.caption: str = convert_body(article.chapo) # Probably unused
self.ps: str = convert_body(article.ps) # Probably unused
self.update_2: str = article.date_modif # Probably unused duplicate of maj
self.creation: str = article.date_redac
self.forum: bool = article.accepter_forum == "oui" # TODO Why?
self.sitename: str = article.nom_site # Probably useless
self.virtual: str = article.virtuel # TODO Why?
self.microblog: str = article.microblog # Probably unused
# self.export = article.export # USELESS
# self.views: int = article.visites # USELESS in static
# self.referers: int = article.referers # USELESS in static
# self.popularity: float = article.popularite # USELESS in static
# self.version = article.id_version # USELESS
def get_authors(self) -> list[SpipAuteurs]:
return (
SpipAuteurs.select()
.join(
SpipAuteursLiens,
on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
)
.where(SpipAuteursLiens.id_objet == self.id)
)
def get_frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
return super().get_frontmatter(
{
"surtitle": self.surtitle,
"subtitle": self.subtitle,
"date": self.creation,
"authors": [author.nom for author in self.get_authors()],
# Debugging
"spip_id_rubrique": self.section_id,
"spip_id_secteur": self.sector_id,
"spip_chapo": self.caption,
}
| append
if append is not None
else {},
)
def get_body(self) -> str:
body: str = super().get_body()
# If there is a caption, add the caption followed by a hr
if hasattr(self, "caption") and len(self.caption) > 0:
body += "\n\n" + self.caption + "\n\n***"
# PS
if hasattr(self, "ps") and len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if hasattr(self, "microblog") and len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body
class Section(Item):
def __init__(self, section: SpipRubriques):
super().__init__(section)
self.id: int = section.id_rubrique
self.parent_id: int = section.id_parent
self.depth: int = section.profondeur
self.agenda: int = section.agenda
def get_filename(self) -> str:
return "_" + super().get_filename()
def get_articles(self, limit: int = 0):
return Articles(self.id, limit)
class Articles(Iterator):
def __init__(self, section_id: int, limit: int = 0):
# Query the DB to retrieve all articles sorted by publication date
if limit > 0:
items = (
SpipArticles.select()
.where(SpipArticles.id_rubrique == section_id)
.order_by(SpipArticles.date.desc())
.limit(limit)
)
else:
items = (
SpipArticles.select()
.where(SpipArticles.id_rubrique == section_id)
.order_by(SpipArticles.date.desc())
)
self.items: list[Article] = [Article(i) for i in items]
super().__init__()
class Sections(Iterator):
def __init__(self, limit: int = 0):
# Query the DB to retrieve all sections sorted by publication date
if limit > 0:
items = (
SpipRubriques.select().order_by(SpipRubriques.date.desc()).limit(limit)
)
else:
items = SpipRubriques.select().order_by(SpipRubriques.date.desc())
self.items: list[Section] = [Section(i) for i in items]
super().__init__()

View File

@ -1,18 +1,19 @@
#!python
# pyright: strict
from os import makedirs
from os.path import expanduser
from shutil import copyfile, rmtree
from sys import argv
from config import config
from converter import get_unknown_chars, unknown_chars
from database import db
from items import (
from converters import unknown_chars, unknown_chars_context
from database import DB
from spipobjects import (
Article,
Document,
Section,
Sections,
Rubrique,
get_articles,
get_documents,
get_sections,
)
@ -64,26 +65,27 @@ def indent(nb: int = 1) -> None:
# Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect()
DB.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
DB.connect()
# Output information about ongoing export & write section to output destination
def write_section(index: int, total: int, section: Section) -> str:
def write_section(index: int, total: int, section: Rubrique) -> str:
color = G # Associate sections to green
# Print the name of the exported section & number of remaining sections
style(f"{index + 1}. ", BO)
highlight(section.title, *unknown_chars(section.title))
style(f" {total-index-1}", BO, G)
highlight(section.titre, *unknown_chars(section.titre))
style(f" {total-index-1}", BO, color)
style(f" section{s(total-index)} left")
# Define the sections path (directory) & create directory(ies) if needed
sectiondir: str = config.output_dir + "/" + section.get_slug()
sectiondir: str = config.output_dir + "/" + section.slug()
makedirs(sectiondir, exist_ok=True)
# Define the section filename & write the index at that filename
sectionpath: str = sectiondir + "/" + section.get_filename()
sectionpath: str = sectiondir + "/" + section.filename()
with open(sectionpath, "w") as f:
f.write(section.get_content())
f.write(section.content())
# Print export location when finished exporting
style(" -> ", BO, G)
style(" -> ", BO, color)
print(sectionpath)
# Return the first "limit" articles of section
return sectiondir
@ -91,30 +93,31 @@ def write_section(index: int, total: int, section: Section) -> str:
# Output information about ongoing export & write article to output destination
def write_article(index: int, total: int, article: Article, sectiondir: str) -> str:
color = Y # Associate articles to yellow
# Print the remaining number of articles to export every 100 articles
if index % 100 == 0:
indent()
print("Exporting", end="")
style(f" {total-index}", BO, Y)
style(f" {total-index}", BO, color)
print(" SPIP", end="")
style(f" article{s(total-index)}")
print(" to Markdown & YAML files")
# Print the title of the article being exported
style(
f" {index + 1}. "
+ ("EMPTY " if len(article.text) < 1 else "")
+ ("EMPTY " if len(article.texte) < 1 else "")
+ f"{article.lang} "
)
highlight(article.title, *unknown_chars(article.title))
highlight(article.titre, *unknown_chars(article.titre))
# Define the full article path & create directory(ies) if needed
articledir: str = sectiondir + "/" + article.get_slug()
articledir: str = sectiondir + "/" + article.slug()
makedirs(articledir, exist_ok=True)
# Define the article filename & write the article at the filename
articlepath: str = articledir + "/" + article.get_filename()
articlepath: str = articledir + "/" + article.filename()
with open(articlepath, "w") as f:
f.write(article.get_content())
f.write(article.content())
# Print export location when finished exporting
style(" -> ", BO, B)
style(" -> ", BO, color)
print(articlepath)
return articledir
@ -123,34 +126,35 @@ def write_article(index: int, total: int, article: Article, sectiondir: str) ->
def write_document(
index: int, total: int, document: Document, objectdir: str, indent_depth: int = 1
) -> None:
color = B # Associate documents to blue
if index % 100 == 0:
indent(indent_depth)
print("Exporting", end="")
style(f" {total-index}", BO, B)
style(f" {total-index}", BO, color)
style(f" document{s(total-index)}\n")
# Print the name of the file with a counter
indent(indent_depth)
style(f"{index + 1}. {document.media} ")
if len(document.title) > 0:
highlight(document.title + " ", *unknown_chars(document.title))
if len(document.titre) > 0:
highlight(document.titre + " ", *unknown_chars(document.titre))
style("at ")
print(document.file, end="")
print(document.fichier, end="")
# Define document path
documentpath: str = expanduser(config.data_dir + "/" + document.file)
documentpath: str = expanduser(config.data_dir + "/" + document.fichier)
# Copy the document from its SPIP location to the new location
try:
copyfile(documentpath, objectdir + "/" + document.get_slug())
copyfile(documentpath, objectdir + "/" + document.slug())
except FileNotFoundError:
style(" -> NOT FOUND!\n", BO, R)
else:
# Print the outputted files path when copied the file
style(" ->", BO, B)
print(f" {objectdir}/{document.get_slug()}")
style(" ->", BO, color)
print(f" {objectdir}/{document.slug()}")
# Return true if an article field contains an unknown character
def has_unknown_chars(article: Article) -> bool:
if len(get_unknown_chars(article.text)) > 0:
if len(unknown_chars_context(article.texte)) > 0:
return True
return False
@ -159,13 +163,13 @@ def has_unknown_chars(article: Article) -> bool:
def warn_unknown_chars(article: Article) -> None:
# Print the title of the article in which there is unknown characters
# & the number of them
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
style(f"{nb}")
print(f" unknown character{s} in", end="")
style(f" {article.lang} ")
highlight(article.title, *unknown_chars(article.title))
highlight(article.titre, *unknown_chars(article.titre))
print() # Break line
# Print the context in which the unknown characters are found
for text in unknown_chars_apparitions:
@ -197,7 +201,7 @@ if __name__ == "__main__":
unknown_chars_articles: list[Article] = []
# Get sections with an eventual maximum
sections = Sections(max_sections_export)
sections = get_sections(max_sections_export)
nb_sections_export: int = len(sections)
# Loop among sections & export them
@ -205,11 +209,11 @@ if __name__ == "__main__":
# Write the section & store its articles
sectiondir = write_section(i, nb_sections_export, section)
# Loop over sections related files (images …)
documents = section.get_documents()
documents = get_documents(section.id_rubrique)
for i, document in enumerate(documents):
write_document(i, len(documents), document, sectiondir)
# Loop over sections articles
articles = section.get_articles(max_articles_export)
articles = get_articles(section.id_rubrique, (max_articles_export))
for i, article in enumerate(articles):
articledir = write_article(i, len(articles), article, sectiondir)
# Add article to unknown_chars_articles if needed
@ -218,7 +222,7 @@ if __name__ == "__main__":
# Decrement export limit
max_articles_export -= 1
# Loop over articles related files (images …)
documents = section.get_documents()
documents = get_documents(article.id_article)
for i, document in enumerate(documents):
write_document(i, len(documents), document, sectiondir, 2)
# Break line when finished exporting the section
@ -229,4 +233,4 @@ if __name__ == "__main__":
for article in unknown_chars_articles:
warn_unknown_chars(article)
db.close() # Close the connection with the database
DB.close() # Close the connection with the database

212
spip2md/spipobjects.py Normal file
View File

@ -0,0 +1,212 @@
from os.path import basename, splitext
from peewee import ModelSelect
from slugify import slugify
from yaml import dump
from converters import convert
from database import (
SpipArticles,
SpipAuteurs,
SpipAuteursLiens,
SpipDocuments,
SpipDocumentsLiens,
SpipRubriques,
)
EXPORTTYPE: str = "md"
# Convert images & files links
# text: str = convert_documents(
# self.texte,
# [(d.id, d.titre, d.slug()) for d in self.documents()],
# )
class Document(SpipDocuments):
class Meta:
table_name: str = "spip_documents"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True)
self.statut: str = "false" if self.statut == "publie" else "true"
def slug(self, date: bool = False) -> str:
name_type: tuple[str, str] = splitext(basename(self.fichier))
return (
slugify((self.date_publication + "-" if date else "") + name_type[0])
+ name_type[1]
)
class Article(SpipArticles):
class Meta:
table_name: str = "spip_articles"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True)
self.texte: str = convert(self.texte) # Convert SPIP to Markdown
self.statut: str = "false" if self.statut == "publie" else "true"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
# Article specific
self.surtitle: str = convert(self.surtitre, True) # Probably unused
self.subtitle: str = convert(self.soustitre, True) # Probably unused
self.caption: str = convert(self.chapo) # Probably unused
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str:
return "index" + "." + self.lang + "." + EXPORTTYPE
def frontmatter(self) -> str:
return dump(
{
"lang": self.lang,
"translationKey": self.id_trad,
"title": self.titre,
"publishDate": self.date,
"lastmod": self.maj,
"draft": self.statut,
"description": self.descriptif,
# Debugging
"spip_id": self.id_article,
"spip_id_secteur": self.id_secteur,
# Article specific
"surtitle": self.surtitle,
"subtitle": self.subtitle,
"date": self.date_redac,
"authors": [author.nom for author in self.authors()],
# Debugging
"spip_id_rubrique": self.id_rubrique,
"spip_chapo": self.caption,
},
allow_unicode=True,
)
def body(self) -> str:
body: str = ""
# Add the title as a Markdown h1
if len(self.titre) > 0:
body += "\n\n# " + self.titre
# If there is a text, add the text preceded by two line breaks
if len(self.texte) > 0:
# Remove remaining HTML after & append to body
body += "\n\n"
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
# If there is a caption, add the caption followed by a hr
if hasattr(self, "caption") and len(self.caption) > 0:
body += "\n\n" + self.caption + "\n\n***"
# PS
if hasattr(self, "ps") and len(self.ps) > 0:
body += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if hasattr(self, "microblog") and len(self.microblog) > 0:
body += "\n\n# MICROBLOGGING\n\n" + self.microblog
return body
def content(self) -> str:
# Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body()
def authors(self) -> list[SpipAuteurs]:
return (
SpipAuteurs.select()
.join(
SpipAuteursLiens,
on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
)
.where(SpipAuteursLiens.id_objet == self.id_article)
)
class Rubrique(SpipRubriques):
class Meta:
table_name: str = "spip_rubriques"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.titre: str = convert(self.titre, True)
self.descriptif: str = convert(self.descriptif, True)
self.texte: str = convert(self.texte) # Convert SPIP to Markdown
self.statut: str = "false" if self.statut == "publie" else "true"
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
def filename(self) -> str:
return "index" + "." + self.lang + "." + EXPORTTYPE
def frontmatter(self) -> str:
return dump(
{
"lang": self.lang,
"translationKey": self.id_trad,
"title": self.titre,
"publishDate": self.date,
"lastmod": self.maj,
"draft": self.statut,
"description": self.descriptif,
# Debugging
"spip_id": self.id_rubrique,
"spip_id_secteur": self.id_secteur,
},
allow_unicode=True,
)
def body(self) -> str:
body: str = ""
# Add the title as a Markdown h1
if len(self.titre) > 0:
body += "\n\n# " + self.titre
# If there is a text, add the text preceded by two line breaks
if len(self.texte) > 0:
# Remove remaining HTML after & append to body
body += "\n\n"
# Same with an "extra" section
if len(self.extra) > 0:
body += "\n\n# EXTRA\n\n" + self.extra
return body
def content(self) -> str:
# Return the final article text
return "---\n" + self.frontmatter() + "---" + self.body()
# Query the DB to retrieve all sections sorted by publication date
def get_sections(limit: int = 10**6) -> ModelSelect:
return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
# Query the DB to retrieve all articles sorted by publication date
def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
return (
Article.select()
.where(Article.id_rubrique == section_id)
.order_by(Article.date.desc())
.limit(limit)
)
# Query the DB to retrieve all documents related to object of id object_id
def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
return (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == object_id)
.limit(limit)
)