links documents and images

2023-05-24 13:37:59 +02:00 · 2023-05-24 13:37:59 +02:00 · 27938f92b9
commit 27938f92b9
parent a71302c5ed
3 changed files with 75 additions and 63 deletions
--- a/spip2md/converters.py
+++ b/spip2md/converters.py
@ -231,6 +231,10 @@ ISO_TO_UTF = (
        "â€ ",
        r"† ",
    ),
+    (  # Remove Windows style line feed
+        "\r",
+        r"",
+    ),
 )

 # WARNING unknown broken encoding
@ -256,11 +260,10 @@ def convert(text: Optional[str], clean_meta: bool = False) -> str:


 # Replace images & files links in Markdown with real slugs of the actually linked files
-def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
-    for id, name, slug in documents:
+def link_document(text: str, id: int, name: str, slug: str) -> str:
    # Replace images that dont have a title written in text
    text = sub(
-            r"\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
+        r"!\[]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
        f"![{name}]({slug})",
        text,
    )
@ -272,7 +275,7 @@ def link_documents(text: str, documents: list[tuple[int, str, str]]) -> str:
    )
    # Replace images that already had a title in Markdown style link
    text = sub(
-            r"\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
+        r"!\[(.+?)\]\((?:img|image)" + str(id) + r"(\|.*?)*\)",
        f"![\\1]({slug})",
        text,
    )
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -12,7 +12,6 @@ from spipobjects import (
    Document,
    Rubrique,
    get_articles,
-    get_documents,
    get_sections,
 )

@ -205,25 +204,28 @@ if __name__ == "__main__":

    # Loop among sections & export them
    for i, section in enumerate(sections):
-        # Write the section & store its articles
+        # Get section’s documents & link them
+        documents = section.documents()
+        # Write the section and store its output directory
        sectiondir = write_section(i, nb_sections_export, section)
-        # Loop over section’s related files (images …)
-        documents = get_documents(section.id_rubrique)
+        # Loop over section’s related documents (images …)
        for i, document in enumerate(documents):
            write_document(i, len(documents), document, sectiondir)
        # Loop over section’s articles
        articles = get_articles(section.id_rubrique, (max_articles_export))
        for i, article in enumerate(articles):
+            # Get article’s documents & link them
+            documents = article.documents()
+            # Write the article and store its output directory
            articledir = write_article(i, len(articles), article, sectiondir)
            # Add article to unknown_chars_articles if needed
            if has_unknown_chars(article):
                unknown_chars_articles.append(article)
            # Decrement export limit
            max_articles_export -= 1
-            # Loop over article’s related files (images …)
-            documents = get_documents(article.id_article)
+            # Loop over article’s related documents (images …)
            for i, document in enumerate(documents):
-                write_document(i, len(documents), document, sectiondir, 2)
+                write_document(i, len(documents), document, articledir, 2)
        # Break line when finished exporting the section
        print()

--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -4,7 +4,7 @@ from peewee import ModelSelect
 from slugify import slugify
 from yaml import dump

-from converters import convert
+from converters import convert, link_document
 from database import (
    SpipArticles,
    SpipAuteurs,
@ -16,12 +16,6 @@ from database import (

 EXPORTTYPE: str = "md"

-# Convert images & files links
-# text: str = convert_documents(
-#     self.texte,
-#     [(d.id, d.titre, d.slug()) for d in self.documents()],
-# )
-

 class Document(SpipDocuments):
    class Meta:
@ -60,6 +54,19 @@ class Article(SpipArticles):
        self.ps: str = convert(self.ps)  # Probably unused
        self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"

+    def documents(self) -> ModelSelect:
+        documents = (
+            Document.select()
+            .join(
+                SpipDocumentsLiens,
+                on=(Document.id_document == SpipDocumentsLiens.id_document),
+            )
+            .where(SpipDocumentsLiens.id_objet == self.id_article)
+        )
+        for d in documents:
+            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
+        return documents
+
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)

@ -129,6 +136,16 @@ class Article(SpipArticles):
        )


+# Query the DB to retrieve all articles sorted by publication date
+def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
+    return (
+        Article.select()
+        .where(Article.id_rubrique == section_id)
+        .order_by(Article.date.desc())
+        .limit(limit)
+    )
+
+
 class Rubrique(SpipRubriques):
    class Meta:
        table_name: str = "spip_rubriques"
@ -142,6 +159,19 @@ class Rubrique(SpipRubriques):
        self.langue_choisie: str = "false" if self.langue_choisie == "oui" else "true"
        self.extra: str = convert(self.extra)  # Probably unused

+    def documents(self) -> ModelSelect:
+        documents = (
+            Document.select()
+            .join(
+                SpipDocumentsLiens,
+                on=(Document.id_document == SpipDocumentsLiens.id_document),
+            )
+            .where(SpipDocumentsLiens.id_objet == self.id_rubrique)
+        )
+        for d in documents:
+            self.texte = link_document(self.texte, d.id_document, d.titre, d.slug())
+        return documents
+
    def slug(self, date: bool = False) -> str:
        return slugify((self.date + "-" if date else "") + self.titre)

@ -187,26 +217,3 @@ class Rubrique(SpipRubriques):
 # Query the DB to retrieve all sections sorted by publication date
 def get_sections(limit: int = 10**6) -> ModelSelect:
    return Rubrique.select().order_by(Rubrique.date.desc()).limit(limit)
-
-
-# Query the DB to retrieve all articles sorted by publication date
-def get_articles(section_id: int, limit: int = 10**6) -> ModelSelect:
-    return (
-        Article.select()
-        .where(Article.id_rubrique == section_id)
-        .order_by(Article.date.desc())
-        .limit(limit)
-    )
-
-
-# Query the DB to retrieve all documents related to object of id object_id
-def get_documents(object_id: int, limit: int = 10**6) -> ModelSelect:
-    return (
-        Document.select()
-        .join(
-            SpipDocumentsLiens,
-            on=(Document.id_document == SpipDocumentsLiens.id_document),
-        )
-        .where(SpipDocumentsLiens.id_objet == object_id)
-        .limit(limit)
-    )