links documents and images

2023-05-24 13:37:59 +02:00 · 2023-05-24 13:37:59 +02:00 · 27938f92b9
commit 27938f92b9
parent a71302c5ed
3 changed files with 75 additions and 63 deletions
--- a/spip2md/converters.py
+++ b/spip2md/converters.py
@ -231,6 +231,10 @@ ISO_TO_UTF = (
        "â€ ",
        r"† ",
    ),
    (  # Remove Windows style line feed
        "\r",
        r"",
    ),
 )
 # WARNING unknown broken encoding
@ -256,32 +260,31 @@ def convert(text: Optional[str], clean_meta: bool = False) -> str:
 # Replace images & files links in Markdown with real slugs of the actually linked files
-def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
+def link_document(text: str, id: int, name: str, slug: str) -> str:
-    for id, name, slug in documents:
+    # Replace images that dont have a title written in text
-        # Replace images that dont have a title written in text
+    text = sub(
-        text = sub(
+        r"!\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
-            r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
+        f"![{name}]({slug})",
-            f"![{name}]({slug})",
+        text,
-            text,
+    )
-        )
+    # Replace images that dont have a title written in text
-        # Replace images that dont have a title written in text
+    text = sub(
-        text = sub(
+        r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
-            r"\[]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
+        f"[{name}]({slug})",
-            f"[{name}]({slug})",
+        text,
-            text,
+    )
-        )
+    # Replace images that already had a title in Markdown style link
-        # Replace images that already had a title in Markdown style link
+    text = sub(
-        text = sub(
+        r"!\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
-            r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
+        f"![\\1]({slug})",
-            f"![\\1]({slug})",
+        text,
-            text,
+    )
-        )
+    # Replace documents that already had a title in Markdown style link
-        # Replace documents that already had a title in Markdown style link
+    text = sub(
-        text = sub(
+        r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
-            r"\[(.+?)\]\((?:doc|document|emb)" + str(id) + r"(\|.*?)*\)",
+        f"[\\1]({slug})",
-            f"[\\1]({slug})",
+        text,
-            text,
+    )
        )
    return text
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -12,7 +12,6 @@ from spipobjects import (
    Document,
    Rubrique,
    get_articles,
    get_documents,
    get_sections,
 )
@ -205,25 +204,28 @@ if __name__ == "__main__":
    # Loop among sections & export them
    for i, section in enumerate(sections):
-        # Write the section & store its articles
+        # Get section’s documents & link them
        documents = section.documents()
        # Write the section and store its output directory
        sectiondir = write_section(i, nb_sections_export, section)
-        # Loop over section’s related files (images …)
+        # Loop over section’s related documents (images …)
        documents = get_documents(section.id_rubrique)
        for i, document in enumerate(documents):
            write_document(i, len(documents), document, sectiondir)
        # Loop over section’s articles
        articles = get_articles(section.id_rubrique, (max_articles_export))
        for i, article in enumerate(articles):
            # Get article’s documents & link them
            documents = article.documents()
            # Write the article and store its output directory
            articledir = write_article(i, len(articles), article, sectiondir)
            # Add article to unknown_chars_articles if needed
            if has_unknown_chars(article):
                unknown_chars_articles.append(article)
            # Decrement export limit
            max_articles_export -= 1
-            # Loop over article’s related files (images …)
+            # Loop over article’s related documents (images …)
            documents = get_documents(article.id_article)
            for i, document in enumerate(documents):
-                write_document(i, len(documents), document, sectiondir, 2)
+                write_document(i, len(documents), document, articledir, 2)
        # Break line when finished exporting the section
        print()
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -4,7 +4,7 @@ from peewee import ModelSelect
 from slugify import slugify
 from yaml import dump
-from converters import convert
+from converters import convert, link_document
 from database import (
    SpipArticles,
    SpipAuteurs,
@ -16,12 +16,6 @@ from database import (
 EXPORTTYPE: str = "md"
 # Convert images & files links
 # text: str = convert_documents(
 #     self.texte,
 #     [(d.id, d.titre, d.slug()) for d in self.documents()],
 # )
 class Document(SpipDocuments):
    class Meta:
@ -60,6 +54,19 @@ class Article(SpipArticles):
        self.ps: str = convert(self.ps)  # Probably unused
        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
    def documents(self) -> ModelSelect:
        documents = (
            Document.select()
            .join(
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
            .where(SpipDocumentsLiens.id_objet == self.id_article)
        )
        for d in documents:
            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
        return documents
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)
@ -129,6 +136,16 @@ class Article(SpipArticles):
        )
 # Query the DB to retrieve all articles sorted by publication date
 def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
    return (
        Article.select()
        .where(Article.id_rubrique == section_id)
        .order_by(Article.date.desc())
        .limit(limit)
    )
 class Rubrique(SpipRubriques):
    class Meta:
        table_name: str = "spip_rubriques"
@ -142,6 +159,19 @@ class Rubrique(SpipRubriques):
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused
    def documents(self) -> ModelSelect:
        documents = (
            Document.select()
            .join(
                SpipDocumentsLiens,
                on=(Document.id_document == SpipDocumentsLiens.id_document),
            )
            .where(SpipDocumentsLiens.id_objet == self.id_rubrique)
        )
        for d in documents:
            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
        return documents
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)
@ -187,26 +217,3 @@ class Rubrique(SpipRubriques):
 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect:
    return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
 # Query the DB to retrieve all articles sorted by publication date
 def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
    return (
        Article.select()
        .where(Article.id_rubrique == section_id)
        .order_by(Article.date.desc())
        .limit(limit)
    )
 # Query the DB to retrieve all documents related to object of id object_id
 def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
    return (
        Document.select()
        .join(
            SpipDocumentsLiens,
            on=(Document.id_document == SpipDocumentsLiens.id_document),
        )
        .where(SpipDocumentsLiens.id_objet == object_id)
        .limit(limit)
    )