more logging, some links still don’t convert

2023-05-30 15:22:39 +02:00 · 2023-05-30 15:22:39 +02:00 · 2ba94d03a8
commit 2ba94d03a8
parent 27c281db90
4 changed files with 68 additions and 43 deletions
--- a/spip2md/init.py
+++ b/spip2md/init.py
@ -1,6 +1,8 @@
 # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
+import logging
 import sys
-from os import makedirs
+from os import makedirs, remove
+from os.path import isfile
 from shutil import rmtree

 from spip2md.config import CFG
@ -27,6 +29,18 @@ def count_output(
    return (branches, leaves)


+# Clear the previous log file if needed
+if CFG.clear_log and isfile(CFG.logfile):
+    remove(CFG.logfile)
+# Configure logging
+logging.basicConfig(
+    format="%(levelname)s:%(message)s",
+    filename=CFG.logfile,
+    encoding="utf-8",
+    level=CFG.loglevel,
+)
+
+
 # Connect to the MySQL database with Peewee ORM
 DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
 DB.connect()
@ -64,4 +78,4 @@ stored into {esc(BOLD)}{branches}{esc()} directories"""
    )

    # Warn about issued warnings in log file
-    print(f"\nThere might be warnings in {esc(BOLD)}{CFG.logfile}{esc()}")
+    print(f"\nThere might be warnings and infos in {esc(BOLD)}{CFG.logfile}{esc()}")
--- a/spip2md/config.py
+++ b/spip2md/config.py
@ -25,9 +25,10 @@ class Configuration:
    clear_log: bool = True
    prepend_h1: bool = True
    export_filetype: str = "md"
-    max_articles_export: int = 1000  # TODO reimplement with recursion
-    max_sections_export: int = 500  # TODO reimplement with recursion
    logfile: str = "spip2md.log"
+    loglevel: str = "INFO"
+    # max_articles_export: int = 1000  # TODO reimplement with recursion
+    # max_sections_export: int = 500  # TODO reimplement with recursion

    def __init__(self, config_file: Optional[str] = None):
        if config_file is not None:
--- a/spip2md/regexmap.py
+++ b/spip2md/regexmap.py
@ -137,6 +137,17 @@ ARTICLE_LINK = (
    ),
 )  # Name and path can be further replaced with .format()

+SECTION_LINK = (
+    (  # SPIP style documents & embeds links
+        compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
+        r"[{}]({})",
+    ),
+    (  # Markdown style internal links
+        compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
+        r"[\1{}]({})",
+    ),
+)  # Name and path can be further replaced with .format()
+
 # Multi language block, to be further processed per lang
 MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
 MULTILANGS = compile(
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -1,12 +1,12 @@
 # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
 import logging
-from os import makedirs, remove
+from os import makedirs
 from os.path import basename, splitext
 from re import finditer, search
 from shutil import copyfile
 from typing import Any, Match, Optional

-from peewee import BigAutoField, DateTimeField, ModelSelect
+from peewee import BigAutoField, DateTimeField, DoesNotExist, ModelSelect
 from slugify import slugify
 from yaml import dump

@ -27,6 +27,7 @@ from spip2md.regexmap import (
    ISO_UTF,
    MULTILANG_BLOCK,
    MULTILANGS,
+    SECTION_LINK,
    SPECIAL_OUTPUT,
    SPIP_MARKDOWN,
    UNKNOWN_ISO,
@ -34,12 +35,6 @@ from spip2md.regexmap import (
 )
 from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc

-# Clear the previous log file if needed
-if CFG.clear_log:
-    remove(CFG.logfile)
-# Output logs to logfile
-logging.basicConfig(filename=CFG.logfile, encoding="utf-8")
-

 class SpipWritable:
    term_color: int
@ -63,10 +58,10 @@ class SpipWritable:
                    # Outputs the first lang associated text
                    first_lang = lang.group(2)
                else:
-                    pass
+                    title: str = first_lang[:40].strip(" \n")
+                    translate: str = lang.group(2)[:40].strip(" \n")
                    logging.warning(
-                        f"Ignored {lang.group(1)} translation of {first_lang[:40]}: "
-                        + lang.group(2)[:40],
+                        f"Ignored {lang.group(1)} translation of {title}: {translate}",
                    )
            return first_lang

@ -215,38 +210,44 @@ class SpipObject(SpipWritable):
    extra: str

    def convert(self, text: Optional[str], clean_html: bool = True) -> str:
+        def found_replace(path_link: str, doc: Any, text: str, match: Match) -> str:
+            repl: str = path_link.format(doc.titre, doc.filename())
+            logging.info(f"Translating link to {repl}")
+            return text.replace(match.group(), repl)
+
+        def not_found_warn(path_link: str, text: str, match: Match) -> str:
+            logging.warn(f"No object for link {match.group()} in {self.titre}")
+            return text.replace(match.group(), path_link.format("", "NOT FOUND"))
+
        if text is not None and len(text) > 0:
            for id_link, path_link in DOCUMENT_LINK:
                for match in id_link.finditer(text):
-                    doc: Document = Document.get(Document.id_document == match.group(2))
-                    if doc is not None:
-                        text = text.replace(
-                            match.group(), path_link.format(doc.titre, doc.filename())
-                        )
-                    else:
-                        logging.warn(
-                            f"No document for link {match.group()} in {self.titre}"
-                        )
-                        text = text.replace(
-                            match.group(), path_link.format("", "NOT FOUND")
+                    logging.info(f"Found document link {match.group()} in {self.titre}")
+                    try:
+                        doc: Document = Document.get(
+                            Document.id_document == match.group(2)
                        )
+                        text = found_replace(path_link, doc, text, match)
+                    except DoesNotExist:
+                        text = not_found_warn(path_link, text, match)
            for id_link, path_link in ARTICLE_LINK:
                for match in id_link.finditer(text):
+                    logging.info(f"Found article link {match.group()} in {self.titre}")
+                    try:
                        art: Article = Article.get(Article.id_article == match.group(2))
-                    if art is not None:
-                        text = text.replace(
-                            match.group(),
-                            path_link.format(
-                                art.titre, f"{art.dir_slug()}/{art.filename()}"
-                            ),
-                        )
-                    else:
-                        logging.warn(
-                            f"No article for link {match.group()} in {self.titre}"
-                        )
-                        text = text.replace(
-                            match.group(), path_link.format("", "NOT FOUND")
+                        text = found_replace(path_link, art, text, match)
+                    except DoesNotExist:
+                        text = not_found_warn(path_link, text, match)
+            for id_link, path_link in SECTION_LINK:
+                for match in id_link.finditer(text):
+                    logging.info(f"Found section link {match.group()} in {self.titre}")
+                    try:
+                        section: Rubrique = Rubrique.get(
+                            Rubrique.id_rubrique == match.group(2)
                        )
+                        text = found_replace(path_link, section, text, match)
+                    except DoesNotExist:
+                        text = not_found_warn(path_link, text, match)
        else:
            return ""
        return super().convert(text, clean_html)
@ -480,9 +481,7 @@ class RootRubrique(Rubrique):
        # self.object_id = 0
        self.profondeur = 0

-    def write_tree(
-        self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
-    ) -> list[str | list]:
+    def write_tree(self, parent_dir: str) -> list[str | list]:
        # Define dictionary output to diplay
        output: list[str | list] = []
        # Print starting message