links documents and images

This commit is contained in:
Guilhem Fauré 2023-05-24 13:37:59 +02:00
parent a71302c5ed
commit 27938f92b9
3 changed files with 75 additions and 63 deletions

View File

@ -231,6 +231,10 @@ ISO_TO_UTF = (
"†",
r"",
),
( # Remove Windows style line feed
"\r",
r"",
),
)
# WARNING unknown broken encoding
@ -256,32 +260,31 @@ def convert(text: Optional[str], clean_meta: bool = False) -> str:
# Replace images & files links in Markdown with real slugs of the actually linked files
def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
for id, name, slug in documents:
# Replace images that dont have a title written in text
text = sub(
r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![{name}]({slug})",
text,
)
# Replace images that dont have a title written in text
text = sub(
r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[{name}]({slug})",
text,
)
# Replace images that already had a title in Markdown style link
text = sub(
r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![\\1]({slug})",
text,
)
# Replace documents that already had a title in Markdown style link
text = sub(
r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[\\1]({slug})",
text,
)
def link_document(text: str, id: int, name: str, slug: str) -> str:
# Replace images that dont have a title written in text
text = sub(
r"!\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![{name}]({slug})",
text,
)
# Replace images that dont have a title written in text
text = sub(
r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[{name}]({slug})",
text,
)
# Replace images that already had a title in Markdown style link
text = sub(
r"!\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
f"![\\1]({slug})",
text,
)
# Replace documents that already had a title in Markdown style link
text = sub(
r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
f"[\\1]({slug})",
text,
)
return text

View File

@ -12,7 +12,6 @@ from spipobjects import (
Document,
Rubrique,
get_articles,
get_documents,
get_sections,
)
@ -205,25 +204,28 @@ if __name__ == "__main__":
# Loop among sections & export them
for i, section in enumerate(sections):
# Write the section & store its articles
# Get sections documents & link them
documents = section.documents()
# Write the section and store its output directory
sectiondir = write_section(i, nb_sections_export, section)
# Loop over sections related files (images …)
documents = get_documents(section.id_rubrique)
# Loop over sections related documents (images …)
for i, document in enumerate(documents):
write_document(i, len(documents), document, sectiondir)
# Loop over sections articles
articles = get_articles(section.id_rubrique, (max_articles_export))
for i, article in enumerate(articles):
# Get articles documents & link them
documents = article.documents()
# Write the article and store its output directory
articledir = write_article(i, len(articles), article, sectiondir)
# Add article to unknown_chars_articles if needed
if has_unknown_chars(article):
unknown_chars_articles.append(article)
# Decrement export limit
max_articles_export -= 1
# Loop over articles related files (images …)
documents = get_documents(article.id_article)
# Loop over articles related documents (images …)
for i, document in enumerate(documents):
write_document(i, len(documents), document, sectiondir, 2)
write_document(i, len(documents), document, articledir, 2)
# Break line when finished exporting the section
print()

View File

@ -4,7 +4,7 @@ from peewee import ModelSelect
from slugify import slugify
from yaml import dump
from converters import convert
from converters import convert, link_document
from database import (
SpipArticles,
SpipAuteurs,
@ -16,12 +16,6 @@ from database import (
EXPORTTYPE: str = "md"
# Convert images & files links
# text: str = convert_documents(
# self.texte,
# [(d.id, d.titre, d.slug()) for d in self.documents()],
# )
class Document(SpipDocuments):
class Meta:
@ -60,6 +54,19 @@ class Article(SpipArticles):
self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
def documents(self) -> ModelSelect:
documents = (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == self.id_article)
)
for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
return documents
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
@ -129,6 +136,16 @@ class Article(SpipArticles):
)
# Query the DB to retrieve all articles sorted by publication date
def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
return (
Article.select()
.where(Article.id_rubrique == section_id)
.order_by(Article.date.desc())
.limit(limit)
)
class Rubrique(SpipRubriques):
class Meta:
table_name: str = "spip_rubriques"
@ -142,6 +159,19 @@ class Rubrique(SpipRubriques):
self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
self.extra: str = convert(self.extra) # Probably unused
def documents(self) -> ModelSelect:
documents = (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == self.id_rubrique)
)
for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
return documents
def slug(self, date: bool = False) -> str:
return slugify((self.date + "-" if date else "") + self.titre)
@ -187,26 +217,3 @@ class Rubrique(SpipRubriques):
# Query the DB to retrieve all sections sorted by publication date
def get_sections(limit: int = 10**6) -> ModelSelect:
return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
# Query the DB to retrieve all articles sorted by publication date
def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
return (
Article.select()
.where(Article.id_rubrique == section_id)
.order_by(Article.date.desc())
.limit(limit)
)
# Query the DB to retrieve all documents related to object of id object_id
def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
return (
Document.select()
.join(
SpipDocumentsLiens,
on=(Document.id_document == SpipDocumentsLiens.id_document),
)
.where(SpipDocumentsLiens.id_objet == object_id)
.limit(limit)
)