more logging, some links still don’t convert
This commit is contained in:
parent
27c281db90
commit
2ba94d03a8
@ -1,6 +1,8 @@
|
|||||||
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
||||||
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from os import makedirs
|
from os import makedirs, remove
|
||||||
|
from os.path import isfile
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
|
||||||
from spip2md.config import CFG
|
from spip2md.config import CFG
|
||||||
@ -27,6 +29,18 @@ def count_output(
|
|||||||
return (branches, leaves)
|
return (branches, leaves)
|
||||||
|
|
||||||
|
|
||||||
|
# Clear the previous log file if needed
|
||||||
|
if CFG.clear_log and isfile(CFG.logfile):
|
||||||
|
remove(CFG.logfile)
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format="%(levelname)s:%(message)s",
|
||||||
|
filename=CFG.logfile,
|
||||||
|
encoding="utf-8",
|
||||||
|
level=CFG.loglevel,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Connect to the MySQL database with Peewee ORM
|
# Connect to the MySQL database with Peewee ORM
|
||||||
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
|
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
|
||||||
DB.connect()
|
DB.connect()
|
||||||
@ -64,4 +78,4 @@ stored into {esc(BOLD)}{branches}{esc()} directories"""
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Warn about issued warnings in log file
|
# Warn about issued warnings in log file
|
||||||
print(f"\nThere might be warnings in {esc(BOLD)}{CFG.logfile}{esc()}")
|
print(f"\nThere might be warnings and infos in {esc(BOLD)}{CFG.logfile}{esc()}")
|
||||||
|
@ -25,9 +25,10 @@ class Configuration:
|
|||||||
clear_log: bool = True
|
clear_log: bool = True
|
||||||
prepend_h1: bool = True
|
prepend_h1: bool = True
|
||||||
export_filetype: str = "md"
|
export_filetype: str = "md"
|
||||||
max_articles_export: int = 1000 # TODO reimplement with recursion
|
|
||||||
max_sections_export: int = 500 # TODO reimplement with recursion
|
|
||||||
logfile: str = "spip2md.log"
|
logfile: str = "spip2md.log"
|
||||||
|
loglevel: str = "INFO"
|
||||||
|
# max_articles_export: int = 1000 # TODO reimplement with recursion
|
||||||
|
# max_sections_export: int = 500 # TODO reimplement with recursion
|
||||||
|
|
||||||
def __init__(self, config_file: Optional[str] = None):
|
def __init__(self, config_file: Optional[str] = None):
|
||||||
if config_file is not None:
|
if config_file is not None:
|
||||||
|
@ -137,6 +137,17 @@ ARTICLE_LINK = (
|
|||||||
),
|
),
|
||||||
) # Name and path can be further replaced with .format()
|
) # Name and path can be further replaced with .format()
|
||||||
|
|
||||||
|
SECTION_LINK = (
|
||||||
|
( # SPIP style documents & embeds links
|
||||||
|
compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
|
||||||
|
r"[{}]({})",
|
||||||
|
),
|
||||||
|
( # Markdown style internal links
|
||||||
|
compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
|
||||||
|
r"[\1{}]({})",
|
||||||
|
),
|
||||||
|
) # Name and path can be further replaced with .format()
|
||||||
|
|
||||||
# Multi language block, to be further processed per lang
|
# Multi language block, to be further processed per lang
|
||||||
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
|
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
|
||||||
MULTILANGS = compile(
|
MULTILANGS = compile(
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
||||||
import logging
|
import logging
|
||||||
from os import makedirs, remove
|
from os import makedirs
|
||||||
from os.path import basename, splitext
|
from os.path import basename, splitext
|
||||||
from re import finditer, search
|
from re import finditer, search
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
from typing import Any, Match, Optional
|
from typing import Any, Match, Optional
|
||||||
|
|
||||||
from peewee import BigAutoField, DateTimeField, ModelSelect
|
from peewee import BigAutoField, DateTimeField, DoesNotExist, ModelSelect
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from yaml import dump
|
from yaml import dump
|
||||||
|
|
||||||
@ -27,6 +27,7 @@ from spip2md.regexmap import (
|
|||||||
ISO_UTF,
|
ISO_UTF,
|
||||||
MULTILANG_BLOCK,
|
MULTILANG_BLOCK,
|
||||||
MULTILANGS,
|
MULTILANGS,
|
||||||
|
SECTION_LINK,
|
||||||
SPECIAL_OUTPUT,
|
SPECIAL_OUTPUT,
|
||||||
SPIP_MARKDOWN,
|
SPIP_MARKDOWN,
|
||||||
UNKNOWN_ISO,
|
UNKNOWN_ISO,
|
||||||
@ -34,12 +35,6 @@ from spip2md.regexmap import (
|
|||||||
)
|
)
|
||||||
from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc
|
from spip2md.style import BLUE, BOLD, GREEN, WARNING_STYLE, YELLOW, esc
|
||||||
|
|
||||||
# Clear the previous log file if needed
|
|
||||||
if CFG.clear_log:
|
|
||||||
remove(CFG.logfile)
|
|
||||||
# Output logs to logfile
|
|
||||||
logging.basicConfig(filename=CFG.logfile, encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
class SpipWritable:
|
class SpipWritable:
|
||||||
term_color: int
|
term_color: int
|
||||||
@ -63,10 +58,10 @@ class SpipWritable:
|
|||||||
# Outputs the first lang associated text
|
# Outputs the first lang associated text
|
||||||
first_lang = lang.group(2)
|
first_lang = lang.group(2)
|
||||||
else:
|
else:
|
||||||
pass
|
title: str = first_lang[:40].strip(" \n")
|
||||||
|
translate: str = lang.group(2)[:40].strip(" \n")
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"Ignored {lang.group(1)} translation of {first_lang[:40]}: "
|
f"Ignored {lang.group(1)} translation of {title}: {translate}",
|
||||||
+ lang.group(2)[:40],
|
|
||||||
)
|
)
|
||||||
return first_lang
|
return first_lang
|
||||||
|
|
||||||
@ -215,38 +210,44 @@ class SpipObject(SpipWritable):
|
|||||||
extra: str
|
extra: str
|
||||||
|
|
||||||
def convert(self, text: Optional[str], clean_html: bool = True) -> str:
|
def convert(self, text: Optional[str], clean_html: bool = True) -> str:
|
||||||
|
def found_replace(path_link: str, doc: Any, text: str, match: Match) -> str:
|
||||||
|
repl: str = path_link.format(doc.titre, doc.filename())
|
||||||
|
logging.info(f"Translating link to {repl}")
|
||||||
|
return text.replace(match.group(), repl)
|
||||||
|
|
||||||
|
def not_found_warn(path_link: str, text: str, match: Match) -> str:
|
||||||
|
logging.warn(f"No object for link {match.group()} in {self.titre}")
|
||||||
|
return text.replace(match.group(), path_link.format("", "NOT FOUND"))
|
||||||
|
|
||||||
if text is not None and len(text) > 0:
|
if text is not None and len(text) > 0:
|
||||||
for id_link, path_link in DOCUMENT_LINK:
|
for id_link, path_link in DOCUMENT_LINK:
|
||||||
for match in id_link.finditer(text):
|
for match in id_link.finditer(text):
|
||||||
doc: Document = Document.get(Document.id_document == match.group(2))
|
logging.info(f"Found document link {match.group()} in {self.titre}")
|
||||||
if doc is not None:
|
try:
|
||||||
text = text.replace(
|
doc: Document = Document.get(
|
||||||
match.group(), path_link.format(doc.titre, doc.filename())
|
Document.id_document == match.group(2)
|
||||||
)
|
|
||||||
else:
|
|
||||||
logging.warn(
|
|
||||||
f"No document for link {match.group()} in {self.titre}"
|
|
||||||
)
|
|
||||||
text = text.replace(
|
|
||||||
match.group(), path_link.format("", "NOT FOUND")
|
|
||||||
)
|
)
|
||||||
|
text = found_replace(path_link, doc, text, match)
|
||||||
|
except DoesNotExist:
|
||||||
|
text = not_found_warn(path_link, text, match)
|
||||||
for id_link, path_link in ARTICLE_LINK:
|
for id_link, path_link in ARTICLE_LINK:
|
||||||
for match in id_link.finditer(text):
|
for match in id_link.finditer(text):
|
||||||
|
logging.info(f"Found article link {match.group()} in {self.titre}")
|
||||||
|
try:
|
||||||
art: Article = Article.get(Article.id_article == match.group(2))
|
art: Article = Article.get(Article.id_article == match.group(2))
|
||||||
if art is not None:
|
text = found_replace(path_link, art, text, match)
|
||||||
text = text.replace(
|
except DoesNotExist:
|
||||||
match.group(),
|
text = not_found_warn(path_link, text, match)
|
||||||
path_link.format(
|
for id_link, path_link in SECTION_LINK:
|
||||||
art.titre, f"{art.dir_slug()}/{art.filename()}"
|
for match in id_link.finditer(text):
|
||||||
),
|
logging.info(f"Found section link {match.group()} in {self.titre}")
|
||||||
)
|
try:
|
||||||
else:
|
section: Rubrique = Rubrique.get(
|
||||||
logging.warn(
|
Rubrique.id_rubrique == match.group(2)
|
||||||
f"No article for link {match.group()} in {self.titre}"
|
|
||||||
)
|
|
||||||
text = text.replace(
|
|
||||||
match.group(), path_link.format("", "NOT FOUND")
|
|
||||||
)
|
)
|
||||||
|
text = found_replace(path_link, section, text, match)
|
||||||
|
except DoesNotExist:
|
||||||
|
text = not_found_warn(path_link, text, match)
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
return super().convert(text, clean_html)
|
return super().convert(text, clean_html)
|
||||||
@ -480,9 +481,7 @@ class RootRubrique(Rubrique):
|
|||||||
# self.object_id = 0
|
# self.object_id = 0
|
||||||
self.profondeur = 0
|
self.profondeur = 0
|
||||||
|
|
||||||
def write_tree(
|
def write_tree(self, parent_dir: str) -> list[str | list]:
|
||||||
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
|
|
||||||
) -> list[str | list]:
|
|
||||||
# Define dictionary output to diplay
|
# Define dictionary output to diplay
|
||||||
output: list[str | list] = []
|
output: list[str | list] = []
|
||||||
# Print starting message
|
# Print starting message
|
||||||
|
Loading…
Reference in New Issue
Block a user