more logging, some links still don’t convert
This commit is contained in:
parent
27c281db90
commit
2ba94d03a8
@ -1,6 +1,8 @@
|
||||
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
||||
import logging
|
||||
import sys
|
||||
from os import makedirs
|
||||
from os import makedirs, remove
|
||||
from os.path import isfile
|
||||
from shutil import rmtree
|
||||
|
||||
from spip2md.config import CFG
|
||||
@ -27,6 +29,18 @@ def count_output(
|
||||
return (branches, leaves)
|
||||
|
||||
|
||||
# Clear the previous log file if needed
|
||||
if CFG.clear_log and isfile(CFG.logfile):
|
||||
remove(CFG.logfile)
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
format="%(levelname)s:%(message)s",
|
||||
filename=CFG.logfile,
|
||||
encoding="utf-8",
|
||||
level=CFG.loglevel,
|
||||
)
|
||||
|
||||
|
||||
# Connect to the MySQL database with Peewee ORM
|
||||
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
|
||||
DB.connect()
|
||||
@ -64,4 +78,4 @@ stored into {esc(BOLD)}{branches}{esc()} directories"""
|
||||
)
|
||||
|
||||
# Warn about issued warnings in log file
|
||||
print(f"\nThere might be warnings in {esc(BOLD)}{CFG.logfile}{esc()}")
|
||||
print(f"\nThere might be warnings and infos in {esc(BOLD)}{CFG.logfile}{esc()}")
|
||||
|
@ -25,9 +25,10 @@ class Configuration:
|
||||
clear_log: bool = True
|
||||
prepend_h1: bool = True
|
||||
export_filetype: str = "md"
|
||||
max_articles_export: int = 1000 # TODO reimplement with recursion
|
||||
max_sections_export: int = 500 # TODO reimplement with recursion
|
||||
logfile: str = "spip2md.log"
|
||||
loglevel: str = "INFO"
|
||||
# max_articles_export: int = 1000 # TODO reimplement with recursion
|
||||
# max_sections_export: int = 500 # TODO reimplement with recursion
|
||||
|
||||
def __init__(self, config_file: Optional[str] = None):
|
||||
if config_file is not None:
|
||||
|
@ -137,6 +137,17 @@ ARTICLE_LINK = (
|
||||
),
|
||||
) # Name and path can be further replaced with .format()
|
||||
|
||||
SECTION_LINK = (
|
||||
( # SPIP style documents & embeds links
|
||||
compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
|
||||
r"[{}]({})",
|
||||
),
|
||||
( # Markdown style internal links
|
||||
compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
|
||||
r"[\1{}]({})",
|
||||
),
|
||||
) # Name and path can be further replaced with .format()
|
||||
|
||||
# Multi language block, to be further processed per lang
|
||||
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
|
||||
MULTILANGS = compile(
|
||||
|
@ -1,12 +1,12 @@
|
||||
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
||||
import logging
|
||||
from os import makedirs, remove
|
||||
from os import makedirs
|
||||
from os.path import basename, splitext
|
||||
from re import finditer, search
|
||||
from shutil import copyfile
|
||||
from typing import Any, Match, Optional
|
||||
|
||||
from peewee import BigAutoField, DateTimeField, ModelSelect
|
||||
from peewee import BigAutoField, DateTimeField, DoesNotExist, ModelSelect
|
||||
from slugify import slugify
|
||||
from yaml import dump
|
||||
|
||||
@ -27,6 +27,7 @@ from spip2md.regexmap import (
|
||||
ISO_UTF,
|
||||
MULTILANG_BLOCK,
|
||||
MULTILANGS,
|
||||
SECTION_LINK,
|
||||
SPECIAL_OUTPUT,
|
||||
SPIP_MARKDOWN,
|
||||
UNKNOWN_ISO,
|
||||
@ -34,12 +35,6 @@ from spip2md.regexmap import (
|
||||
)
|
||||
from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc
|
||||
|
||||
# Clear the previous log file if needed
|
||||
if CFG.clear_log:
|
||||
remove(CFG.logfile)
|
||||
# Output logs to logfile
|
||||
logging.basicConfig(filename=CFG.logfile, encoding="utf-8")
|
||||
|
||||
|
||||
class SpipWritable:
|
||||
term_color: int
|
||||
@ -63,10 +58,10 @@ class SpipWritable:
|
||||
# Outputs the first lang associated text
|
||||
first_lang = lang.group(2)
|
||||
else:
|
||||
pass
|
||||
title: str = first_lang[:40].strip(" \n")
|
||||
translate: str = lang.group(2)[:40].strip(" \n")
|
||||
logging.warning(
|
||||
f"Ignored {lang.group(1)} translation of {first_lang[:40]}: "
|
||||
+ lang.group(2)[:40],
|
||||
f"Ignored {lang.group(1)} translation of {title}: {translate}",
|
||||
)
|
||||
return first_lang
|
||||
|
||||
@ -215,38 +210,44 @@ class SpipObject(SpipWritable):
|
||||
extra: str
|
||||
|
||||
def convert(self, text: Optional[str], clean_html: bool = True) -> str:
|
||||
def found_replace(path_link: str, doc: Any, text: str, match: Match) -> str:
|
||||
repl: str = path_link.format(doc.titre, doc.filename())
|
||||
logging.info(f"Translating link to {repl}")
|
||||
return text.replace(match.group(), repl)
|
||||
|
||||
def not_found_warn(path_link: str, text: str, match: Match) -> str:
|
||||
logging.warn(f"No object for link {match.group()} in {self.titre}")
|
||||
return text.replace(match.group(), path_link.format("", "NOT FOUND"))
|
||||
|
||||
if text is not None and len(text) > 0:
|
||||
for id_link, path_link in DOCUMENT_LINK:
|
||||
for match in id_link.finditer(text):
|
||||
doc: Document = Document.get(Document.id_document == match.group(2))
|
||||
if doc is not None:
|
||||
text = text.replace(
|
||||
match.group(), path_link.format(doc.titre, doc.filename())
|
||||
)
|
||||
else:
|
||||
logging.warn(
|
||||
f"No document for link {match.group()} in {self.titre}"
|
||||
)
|
||||
text = text.replace(
|
||||
match.group(), path_link.format("", "NOT FOUND")
|
||||
logging.info(f"Found document link {match.group()} in {self.titre}")
|
||||
try:
|
||||
doc: Document = Document.get(
|
||||
Document.id_document == match.group(2)
|
||||
)
|
||||
text = found_replace(path_link, doc, text, match)
|
||||
except DoesNotExist:
|
||||
text = not_found_warn(path_link, text, match)
|
||||
for id_link, path_link in ARTICLE_LINK:
|
||||
for match in id_link.finditer(text):
|
||||
logging.info(f"Found article link {match.group()} in {self.titre}")
|
||||
try:
|
||||
art: Article = Article.get(Article.id_article == match.group(2))
|
||||
if art is not None:
|
||||
text = text.replace(
|
||||
match.group(),
|
||||
path_link.format(
|
||||
art.titre, f"{art.dir_slug()}/{art.filename()}"
|
||||
),
|
||||
)
|
||||
else:
|
||||
logging.warn(
|
||||
f"No article for link {match.group()} in {self.titre}"
|
||||
)
|
||||
text = text.replace(
|
||||
match.group(), path_link.format("", "NOT FOUND")
|
||||
text = found_replace(path_link, art, text, match)
|
||||
except DoesNotExist:
|
||||
text = not_found_warn(path_link, text, match)
|
||||
for id_link, path_link in SECTION_LINK:
|
||||
for match in id_link.finditer(text):
|
||||
logging.info(f"Found section link {match.group()} in {self.titre}")
|
||||
try:
|
||||
section: Rubrique = Rubrique.get(
|
||||
Rubrique.id_rubrique == match.group(2)
|
||||
)
|
||||
text = found_replace(path_link, section, text, match)
|
||||
except DoesNotExist:
|
||||
text = not_found_warn(path_link, text, match)
|
||||
else:
|
||||
return ""
|
||||
return super().convert(text, clean_html)
|
||||
@ -480,9 +481,7 @@ class RootRubrique(Rubrique):
|
||||
# self.object_id = 0
|
||||
self.profondeur = 0
|
||||
|
||||
def write_tree(
|
||||
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
|
||||
) -> list[str | list]:
|
||||
def write_tree(self, parent_dir: str) -> list[str | list]:
|
||||
# Define dictionary output to diplay
|
||||
output: list[str | list] = []
|
||||
# Print starting message
|
||||
|
Loading…
Reference in New Issue
Block a user