cleaning, fixed indentation, styling
This commit is contained in:
parent
952595b34c
commit
cc549db945
@ -3,12 +3,10 @@
|
||||
import sys
|
||||
from os import makedirs
|
||||
from shutil import rmtree
|
||||
from typing import Any
|
||||
|
||||
from peewee import ModelSelect
|
||||
|
||||
from spip2md.config import CFG
|
||||
from spip2md.database import DB
|
||||
from spip2md.regexmap import SPECIAL_OUTPUT
|
||||
from spip2md.spipobjects import RootRubrique, Rubrique
|
||||
|
||||
# Define styles
|
||||
@ -25,66 +23,25 @@ C1 = 96 # Color
|
||||
C2 = 96 # Color
|
||||
|
||||
|
||||
# Print a stylized string, without trailing newline
|
||||
def style(string: str, *args: int, end: str = "") -> None:
|
||||
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
|
||||
# Terminal escape sequence
|
||||
def esc(*args: int) -> str:
|
||||
if len(args) == 0:
|
||||
params: str = "1;" # Defaults to bold
|
||||
params: str = "0;" # Defaults to reset
|
||||
else:
|
||||
params: str = ""
|
||||
# Build a string from args, that will be stripped from its trailing ;
|
||||
for a in args:
|
||||
params += str(a) + ";"
|
||||
print(esc + params[:-1] + "m" + string + esc + "0m", end=end)
|
||||
|
||||
|
||||
# Print a string, highlighting every substring starting at start_stop[x][0] …
|
||||
def highlight(string: str, *start_stop: tuple[int, int], end: str = "") -> None:
|
||||
previous_stop = 0
|
||||
for start, stop in start_stop:
|
||||
print(string[previous_stop:start], end="")
|
||||
style(string[start:stop], BOLD, RED)
|
||||
previous_stop = stop
|
||||
print(string[previous_stop:], end=end)
|
||||
|
||||
|
||||
# Query the DB to retrieve all sections without parent, sorted by publication date
|
||||
def root_sections(limit: int = 10**3) -> ModelSelect:
|
||||
return (
|
||||
Rubrique.select()
|
||||
.where(Rubrique.id_parent == 0)
|
||||
.order_by(Rubrique.date.desc())
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
|
||||
r"""
|
||||
# Print the detected unknown chars in article in their context but highlighted
|
||||
def warn_unknown_chars(article: Article) -> None:
|
||||
# Print the title of the article in which there is unknown characters
|
||||
# & the number of them
|
||||
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
|
||||
nb: int = len(unknown_chars_apparitions)
|
||||
s: str = "s" if nb > 1 else ""
|
||||
style(f"{nb}")
|
||||
print(f" unknown character{s} in", end="")
|
||||
style(f" {article.lang} ")
|
||||
highlight(article.titre, *unknown_chars(article.titre))
|
||||
print() # Break line
|
||||
# Print the context in which the unknown characters are found
|
||||
for text in unknown_chars_apparitions:
|
||||
style(" … ")
|
||||
highlight(text, *unknown_chars(text))
|
||||
style(" … \n")
|
||||
print() # Break line
|
||||
"""
|
||||
# Base terminal escape sequence that needs to be closed by "m"
|
||||
return "\033[" + params[:-1] + "m"
|
||||
|
||||
|
||||
# Print one root section list output correctly
|
||||
# sys.setrecursionlimit(2000)
|
||||
def print_output(
|
||||
tree: list[Any],
|
||||
tree: list[str | list[str | list]],
|
||||
indent: str = " ",
|
||||
depth: int = 0,
|
||||
depth: int = -1,
|
||||
branches: int = 1,
|
||||
leaves: int = 0,
|
||||
) -> tuple[int, int]:
|
||||
@ -93,8 +50,11 @@ def print_output(
|
||||
branches, leaves = print_output(
|
||||
sub, indent, depth + 1, branches + 1, leaves
|
||||
)
|
||||
else:
|
||||
elif type(sub) == str:
|
||||
leaves += 1
|
||||
# Highlight special elements (in red for the moment)
|
||||
for elmnt in SPECIAL_OUTPUT:
|
||||
sub = elmnt.sub(esc(BOLD, GREEN) + r"\1" + esc(), sub)
|
||||
print(indent * depth + sub)
|
||||
return (branches, leaves)
|
||||
|
||||
@ -125,12 +85,58 @@ def main(*argv):
|
||||
root: Rubrique = RootRubrique()
|
||||
|
||||
# Write everything & print the output human-readably
|
||||
sections, articles = print_output(root.write_tree(CFG.output_dir))
|
||||
branches, leaves = print_output(root.write_tree(CFG.output_dir))
|
||||
# End, summary message
|
||||
print(f"Exported a total of {sections} sections, containing {articles} articles")
|
||||
print(
|
||||
f"""
|
||||
Exported a total of {leaves} Markdown files, stored into {branches} directories"""
|
||||
)
|
||||
|
||||
# print() # Break line between export & unknown characters warning
|
||||
# Warn about each article that contains unknown(s) character(s)
|
||||
# TODO do it with Python warnings
|
||||
|
||||
DB.close() # Close the connection with the database
|
||||
|
||||
|
||||
r""" OLD CODE
|
||||
# Print the detected unknown chars in article in their context but highlighted
|
||||
def warn_unknown_chars(article: Article) -> None:
|
||||
# Print the title of the article in which there is unknown characters
|
||||
# & the number of them
|
||||
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
|
||||
nb: int = len(unknown_chars_apparitions)
|
||||
s: str = "s" if nb > 1 else ""
|
||||
style(f"{nb}")
|
||||
print(f" unknown character{s} in", end="")
|
||||
style(f" {article.lang} ")
|
||||
highlight(article.titre, *unknown_chars(article.titre))
|
||||
print() # Break line
|
||||
# Print the context in which the unknown characters are found
|
||||
for text in unknown_chars_apparitions:
|
||||
style(" … ")
|
||||
highlight(text, *unknown_chars(text))
|
||||
style(" … \n")
|
||||
print() # Break line
|
||||
|
||||
# Return a list of tuples giving the start and end of unknown substring in text
|
||||
def unknown_chars(text: str) -> list[tuple[int, int]]:
|
||||
positions: list[tuple[int, int]] = []
|
||||
for char in UNKNOWN_ISO:
|
||||
for match in finditer("(" + char + ")+", text):
|
||||
positions.append((match.start(), match.end()))
|
||||
return positions
|
||||
|
||||
# Return strings with unknown chards found in text, surrounded by context_length chars
|
||||
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
|
||||
errors: list[str] = []
|
||||
context: str = r".{0," + str(context_length) + r"}"
|
||||
for char in UNKNOWN_ISO:
|
||||
matches = finditer(
|
||||
context + r"(?=" + char + r")" + char + context,
|
||||
text,
|
||||
)
|
||||
for match in matches:
|
||||
errors.append(match.group())
|
||||
return errors
|
||||
"""
|
||||
|
@ -256,31 +256,7 @@ UNKNOWN_ISO = (
|
||||
|
||||
# Special elements in terminal output to surround
|
||||
SPECIAL_OUTPUT = (
|
||||
(compile(r"^([0-9]+?\.)(?= )"), r"{}\1{}"), # Counter
|
||||
(compile(r"(?<= )->(?= )"), r"{}->{}"), # Arrow
|
||||
(compile(r"(?<=^Exporting )([0-9]+?)(?= )"), r"{}\1{}"), # Total
|
||||
compile(r"^([0-9]+?\.)(?= )"), # Counter
|
||||
compile(r"(?<= )(->)(?= )"), # Arrow
|
||||
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
|
||||
)
|
||||
|
||||
|
||||
r"""
|
||||
# Return a list of tuples giving the start and end of unknown substring in text
|
||||
def unknown_chars(text: str) -> list[tuple[int, int]]:
|
||||
positions: list[tuple[int, int]] = []
|
||||
for char in UNKNOWN_ISO:
|
||||
for match in finditer("(" + char + ")+", text):
|
||||
positions.append((match.start(), match.end()))
|
||||
return positions
|
||||
|
||||
# Return strings with unknown chards found in text, surrounded by context_length chars
|
||||
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
|
||||
errors: list[str] = []
|
||||
context: str = r".{0," + str(context_length) + r"}"
|
||||
for char in UNKNOWN_ISO:
|
||||
matches = finditer(
|
||||
context + r"(?=" + char + r")" + char + context,
|
||||
text,
|
||||
)
|
||||
for match in matches:
|
||||
errors.append(match.group())
|
||||
return errors
|
||||
"""
|
||||
|
@ -406,6 +406,7 @@ class RootRubrique(Rubrique):
|
||||
# 0 ID
|
||||
self.id_rubrique = 0
|
||||
# self.object_id = 0
|
||||
self.profondeur = 0
|
||||
|
||||
def write_tree(
|
||||
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
|
||||
@ -414,9 +415,11 @@ class RootRubrique(Rubrique):
|
||||
output: list[str | list[Any]] = []
|
||||
# Starting message
|
||||
output.append(
|
||||
f"Begin converting {CFG.db}@{CFG.db_host} db to plain Markdown+YAML files"
|
||||
f"""\
|
||||
Begin exporting `{CFG.db}@{CFG.db_host}` SPIP database to plain Markdown+YAML
|
||||
files into the directory `{parent_dir}`, as database user `{CFG.db_user}`
|
||||
"""
|
||||
)
|
||||
output.append(f" as db user {CFG.db_user}, into the directory {parent_dir}")
|
||||
# Get all child section of self
|
||||
child_sections = (
|
||||
Rubrique.select()
|
||||
|
Loading…
Reference in New Issue
Block a user