more logging, some links still don’t convert

2023-05-30 15:22:39 +02:00 · 2023-05-30 15:22:39 +02:00 · 2ba94d03a8
commit 2ba94d03a8
parent 27c281db90
4 changed files with 68 additions and 43 deletions
--- a/spip2md/init.py
+++ b/spip2md/init.py
@ -1,6 +1,8 @@
 # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
 import logging
 import sys
-from os import makedirs
+from os import makedirs, remove
 from os.path import isfile
 from shutil import rmtree
 from spip2md.config import CFG
@ -27,6 +29,18 @@ def count_output(
    return (branches, leaves)
 # Clear the previous log file if needed
 if CFG.clear_log and isfile(CFG.logfile):
    remove(CFG.logfile)
 # Configure logging
 logging.basicConfig(
    format="%(levelname)s:%(message)s",
    filename=CFG.logfile,
    encoding="utf-8",
    level=CFG.loglevel,
 )
 # Connect to the MySQL database with Peewee ORM
 DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
 DB.connect()
@ -64,4 +78,4 @@ stored into {esc(BOLD)}{branches}{esc()} directories"""
    )
    # Warn about issued warnings in log file
-    print(f"\nThere might be warnings in {esc(BOLD)}{CFG.logfile}{esc()}")
+    print(f"\nThere might be warnings and infos in {esc(BOLD)}{CFG.logfile}{esc()}")
--- a/spip2md/config.py
+++ b/spip2md/config.py
@ -25,9 +25,10 @@ class Configuration:
    clear_log: bool = True
    prepend_h1: bool = True
    export_filetype: str = "md"
    max_articles_export: int = 1000  # TODO reimplement with recursion
    max_sections_export: int = 500  # TODO reimplement with recursion
    logfile: str = "spip2md.log"
    loglevel: str = "INFO"
    # max_articles_export: int = 1000  # TODO reimplement with recursion
    # max_sections_export: int = 500  # TODO reimplement with recursion
    def __init__(self, config_file: Optional[str] = None):
        if config_file is not None:
--- a/spip2md/regexmap.py
+++ b/spip2md/regexmap.py
@ -137,6 +137,17 @@ ARTICLE_LINK = (
    ),
 )  # Name and path can be further replaced with .format()
 SECTION_LINK = (
    (  # SPIP style documents & embeds links
        compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
        r"[{}]({})",
    ),
    (  # Markdown style internal links
        compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
        r"[\1{}]({})",
    ),
 )  # Name and path can be further replaced with .format()
 # Multi language block, to be further processed per lang
 MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
 MULTILANGS = compile(
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -1,12 +1,12 @@
 # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
 import logging
-from os import makedirs, remove
+from os import makedirs
 from os.path import basename, splitext
 from re import finditer, search
 from shutil import copyfile
 from typing import Any, Match, Optional
-from peewee import BigAutoField, DateTimeField, ModelSelect
+from peewee import BigAutoField, DateTimeField, DoesNotExist, ModelSelect
 from slugify import slugify
 from yaml import dump
@ -27,6 +27,7 @@ from spip2md.regexmap import (
    ISO_UTF,
    MULTILANG_BLOCK,
    MULTILANGS,
    SECTION_LINK,
    SPECIAL_OUTPUT,
    SPIP_MARKDOWN,
    UNKNOWN_ISO,
@ -34,12 +35,6 @@ from spip2md.regexmap import (
 )
 from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc
 # Clear the previous log file if needed
 if CFG.clear_log:
    remove(CFG.logfile)
 # Output logs to logfile
 logging.basicConfig(filename=CFG.logfile, encoding="utf-8")
 class SpipWritable:
    term_color: int
@ -63,10 +58,10 @@ class SpipWritable:
                    # Outputs the first lang associated text
                    first_lang = lang.group(2)
                else:
-                    pass
+                    title: str = first_lang[:40].strip(" \n")
                    translate: str = lang.group(2)[:40].strip(" \n")
                    logging.warning(
-                        f"Ignored {lang.group(1)} translation of {first_lang[:40]}: "
+                        f"Ignored {lang.group(1)} translation of {title}: {translate}",
                        + lang.group(2)[:40],
                    )
            return first_lang
@ -215,38 +210,44 @@ class SpipObject(SpipWritable):
    extra: str
    def convert(self, text: Optional[str], clean_html: bool = True) -> str:
        def found_replace(path_link: str, doc: Any, text: str, match: Match) -> str:
            repl: str = path_link.format(doc.titre, doc.filename())
            logging.info(f"Translating link to {repl}")
            return text.replace(match.group(), repl)
        def not_found_warn(path_link: str, text: str, match: Match) -> str:
            logging.warn(f"No object for link {match.group()} in {self.titre}")
            return text.replace(match.group(), path_link.format("", "NOT FOUND"))
        if text is not None and len(text) > 0:
            for id_link, path_link in DOCUMENT_LINK:
                for match in id_link.finditer(text):
-                    doc: Document = Document.get(Document.id_document == match.group(2))
+                    logging.info(f"Found document link {match.group()} in {self.titre}")
-                    if doc is not None:
+                    try:
-                        text = text.replace(
+                        doc: Document = Document.get(
-                            match.group(), path_link.format(doc.titre, doc.filename())
+                            Document.id_document == match.group(2)
                        )
                    else:
                        logging.warn(
                            f"No document for link {match.group()} in {self.titre}"
                        )
                        text = text.replace(
                            match.group(), path_link.format("", "NOT FOUND")
                        )
                        text = found_replace(path_link, doc, text, match)
                    except DoesNotExist:
                        text = not_found_warn(path_link, text, match)
            for id_link, path_link in ARTICLE_LINK:
                for match in id_link.finditer(text):
                    logging.info(f"Found article link {match.group()} in {self.titre}")
                    try:
                        art: Article = Article.get(Article.id_article == match.group(2))
-                    if art is not None:
+                        text = found_replace(path_link, art, text, match)
-                        text = text.replace(
+                    except DoesNotExist:
-                            match.group(),
+                        text = not_found_warn(path_link, text, match)
-                            path_link.format(
+            for id_link, path_link in SECTION_LINK:
-                                art.titre, f"{art.dir_slug()}/{art.filename()}"
+                for match in id_link.finditer(text):
-                            ),
+                    logging.info(f"Found section link {match.group()} in {self.titre}")
-                        )
+                    try:
-                    else:
+                        section: Rubrique = Rubrique.get(
-                        logging.warn(
+                            Rubrique.id_rubrique == match.group(2)
                            f"No article for link {match.group()} in {self.titre}"
                        )
                        text = text.replace(
                            match.group(), path_link.format("", "NOT FOUND")
                        )
                        text = found_replace(path_link, section, text, match)
                    except DoesNotExist:
                        text = not_found_warn(path_link, text, match)
        else:
            return ""
        return super().convert(text, clean_html)
@ -480,9 +481,7 @@ class RootRubrique(Rubrique):
        # self.object_id = 0
        self.profondeur = 0
-    def write_tree(
+    def write_tree(self, parent_dir: str) -> list[str | list]:
        self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
    ) -> list[str | list]:
        # Define dictionary output to diplay
        output: list[str | list] = []
        # Print starting message