more logging, some links still don’t convert

This commit is contained in:
Guilhem Fauré 2023-05-30 15:22:39 +02:00
parent 27c281db90
commit 2ba94d03a8
4 changed files with 68 additions and 43 deletions

View File

@ -1,6 +1,8 @@
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
import logging
import sys import sys
from os import makedirs from os import makedirs, remove
from os.path import isfile
from shutil import rmtree from shutil import rmtree
from spip2md.config import CFG from spip2md.config import CFG
@ -27,6 +29,18 @@ def count_output(
return (branches, leaves) return (branches, leaves)
# Clear the previous log file if needed
if CFG.clear_log and isfile(CFG.logfile):
remove(CFG.logfile)
# Configure logging
logging.basicConfig(
format="%(levelname)s:%(message)s",
filename=CFG.logfile,
encoding="utf-8",
level=CFG.loglevel,
)
# Connect to the MySQL database with Peewee ORM # Connect to the MySQL database with Peewee ORM
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass) DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
DB.connect() DB.connect()
@ -64,4 +78,4 @@ stored into {esc(BOLD)}{branches}{esc()} directories"""
) )
# Warn about issued warnings in log file # Warn about issued warnings in log file
print(f"\nThere might be warnings in {esc(BOLD)}{CFG.logfile}{esc()}") print(f"\nThere might be warnings and infos in {esc(BOLD)}{CFG.logfile}{esc()}")

View File

@ -25,9 +25,10 @@ class Configuration:
clear_log: bool = True clear_log: bool = True
prepend_h1: bool = True prepend_h1: bool = True
export_filetype: str = "md" export_filetype: str = "md"
max_articles_export: int = 1000 # TODO reimplement with recursion
max_sections_export: int = 500 # TODO reimplement with recursion
logfile: str = "spip2md.log" logfile: str = "spip2md.log"
loglevel: str = "INFO"
# max_articles_export: int = 1000 # TODO reimplement with recursion
# max_sections_export: int = 500 # TODO reimplement with recursion
def __init__(self, config_file: Optional[str] = None): def __init__(self, config_file: Optional[str] = None):
if config_file is not None: if config_file is not None:

View File

@ -137,6 +137,17 @@ ARTICLE_LINK = (
), ),
) # Name and path can be further replaced with .format() ) # Name and path can be further replaced with .format()
SECTION_LINK = (
( # SPIP style documents & embeds links
compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
r"[{}]({})",
),
( # Markdown style internal links
compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[\1{}]({})",
),
) # Name and path can be further replaced with .format()
# Multi language block, to be further processed per lang # Multi language block, to be further processed per lang
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I) MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
MULTILANGS = compile( MULTILANGS = compile(

View File

@ -1,12 +1,12 @@
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré # SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
import logging import logging
from os import makedirs, remove from os import makedirs
from os.path import basename, splitext from os.path import basename, splitext
from re import finditer, search from re import finditer, search
from shutil import copyfile from shutil import copyfile
from typing import Any, Match, Optional from typing import Any, Match, Optional
from peewee import BigAutoField, DateTimeField, ModelSelect from peewee import BigAutoField, DateTimeField, DoesNotExist, ModelSelect
from slugify import slugify from slugify import slugify
from yaml import dump from yaml import dump
@ -27,6 +27,7 @@ from spip2md.regexmap import (
ISO_UTF, ISO_UTF,
MULTILANG_BLOCK, MULTILANG_BLOCK,
MULTILANGS, MULTILANGS,
SECTION_LINK,
SPECIAL_OUTPUT, SPECIAL_OUTPUT,
SPIP_MARKDOWN, SPIP_MARKDOWN,
UNKNOWN_ISO, UNKNOWN_ISO,
@ -34,12 +35,6 @@ from spip2md.regexmap import (
) )
from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc
# Clear the previous log file if needed
if CFG.clear_log:
remove(CFG.logfile)
# Output logs to logfile
logging.basicConfig(filename=CFG.logfile, encoding="utf-8")
class SpipWritable: class SpipWritable:
term_color: int term_color: int
@ -63,10 +58,10 @@ class SpipWritable:
# Outputs the first lang associated text # Outputs the first lang associated text
first_lang = lang.group(2) first_lang = lang.group(2)
else: else:
pass title: str = first_lang[:40].strip(" \n")
translate: str = lang.group(2)[:40].strip(" \n")
logging.warning( logging.warning(
f"Ignored {lang.group(1)} translation of {first_lang[:40]}: " f"Ignored {lang.group(1)} translation of {title}: {translate}",
+ lang.group(2)[:40],
) )
return first_lang return first_lang
@ -215,38 +210,44 @@ class SpipObject(SpipWritable):
extra: str extra: str
def convert(self, text: Optional[str], clean_html: bool = True) -> str: def convert(self, text: Optional[str], clean_html: bool = True) -> str:
def found_replace(path_link: str, doc: Any, text: str, match: Match) -> str:
repl: str = path_link.format(doc.titre, doc.filename())
logging.info(f"Translating link to {repl}")
return text.replace(match.group(), repl)
def not_found_warn(path_link: str, text: str, match: Match) -> str:
logging.warn(f"No object for link {match.group()} in {self.titre}")
return text.replace(match.group(), path_link.format("", "NOT FOUND"))
if text is not None and len(text) > 0: if text is not None and len(text) > 0:
for id_link, path_link in DOCUMENT_LINK: for id_link, path_link in DOCUMENT_LINK:
for match in id_link.finditer(text): for match in id_link.finditer(text):
doc: Document = Document.get(Document.id_document == match.group(2)) logging.info(f"Found document link {match.group()} in {self.titre}")
if doc is not None: try:
text = text.replace( doc: Document = Document.get(
match.group(), path_link.format(doc.titre, doc.filename()) Document.id_document == match.group(2)
)
else:
logging.warn(
f"No document for link {match.group()} in {self.titre}"
)
text = text.replace(
match.group(), path_link.format("", "NOT FOUND")
) )
text = found_replace(path_link, doc, text, match)
except DoesNotExist:
text = not_found_warn(path_link, text, match)
for id_link, path_link in ARTICLE_LINK: for id_link, path_link in ARTICLE_LINK:
for match in id_link.finditer(text): for match in id_link.finditer(text):
logging.info(f"Found article link {match.group()} in {self.titre}")
try:
art: Article = Article.get(Article.id_article == match.group(2)) art: Article = Article.get(Article.id_article == match.group(2))
if art is not None: text = found_replace(path_link, art, text, match)
text = text.replace( except DoesNotExist:
match.group(), text = not_found_warn(path_link, text, match)
path_link.format( for id_link, path_link in SECTION_LINK:
art.titre, f"{art.dir_slug()}/{art.filename()}" for match in id_link.finditer(text):
), logging.info(f"Found section link {match.group()} in {self.titre}")
) try:
else: section: Rubrique = Rubrique.get(
logging.warn( Rubrique.id_rubrique == match.group(2)
f"No article for link {match.group()} in {self.titre}"
)
text = text.replace(
match.group(), path_link.format("", "NOT FOUND")
) )
text = found_replace(path_link, section, text, match)
except DoesNotExist:
text = not_found_warn(path_link, text, match)
else: else:
return "" return ""
return super().convert(text, clean_html) return super().convert(text, clean_html)
@ -480,9 +481,7 @@ class RootRubrique(Rubrique):
# self.object_id = 0 # self.object_id = 0
self.profondeur = 0 self.profondeur = 0
def write_tree( def write_tree(self, parent_dir: str) -> list[str | list]:
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
) -> list[str | list]:
# Define dictionary output to diplay # Define dictionary output to diplay
output: list[str | list] = [] output: list[str | list] = []
# Print starting message # Print starting message