cleaning, fixed indentation, styling

This commit is contained in:
Guilhem Fauré 2023-05-26 17:20:30 +02:00
parent 952595b34c
commit cc549db945
3 changed files with 69 additions and 84 deletions

View File

@ -3,12 +3,10 @@
import sys
from os import makedirs
from shutil import rmtree
from typing import Any
from peewee import ModelSelect
from spip2md.config import CFG
from spip2md.database import DB
from spip2md.regexmap import SPECIAL_OUTPUT
from spip2md.spipobjects import RootRubrique, Rubrique
# Define styles
@ -25,66 +23,25 @@ C1 = 96 # Color
C2 = 96 # Color
# Print a stylized string, without trailing newline
def style(string: str, *args: int, end: str = "") -> None:
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
# Terminal escape sequence
def esc(*args: int) -> str:
if len(args) == 0:
params: str = "1;" # Defaults to bold
params: str = "0;" # Defaults to reset
else:
params: str = ""
# Build a string from args, that will be stripped from its trailing ;
for a in args:
params += str(a) + ";"
print(esc + params[:-1] + "m" + string + esc + "0m", end=end)
# Print a string, highlighting every substring starting at start_stop[x][0] …
def highlight(string: str, *start_stop: tuple[int, int], end: str = "") -> None:
previous_stop = 0
for start, stop in start_stop:
print(string[previous_stop:start], end="")
style(string[start:stop], BOLD, RED)
previous_stop = stop
print(string[previous_stop:], end=end)
# Query the DB to retrieve all sections without parent, sorted by publication date
def root_sections(limit: int = 10**3) -> ModelSelect:
return (
Rubrique.select()
.where(Rubrique.id_parent == 0)
.order_by(Rubrique.date.desc())
.limit(limit)
)
r"""
# Print the detected unknown chars in article in their context but highlighted
def warn_unknown_chars(article: Article) -> None:
# Print the title of the article in which there is unknown characters
# & the number of them
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
style(f"{nb}")
print(f" unknown character{s} in", end="")
style(f" {article.lang} ")
highlight(article.titre, *unknown_chars(article.titre))
print() # Break line
# Print the context in which the unknown characters are found
for text in unknown_chars_apparitions:
style("")
highlight(text, *unknown_chars(text))
style("\n")
print() # Break line
"""
# Base terminal escape sequence that needs to be closed by "m"
return "\033[" + params[:-1] + "m"
# Print one root section list output correctly
# sys.setrecursionlimit(2000)
def print_output(
tree: list[Any],
tree: list[str | list[str | list]],
indent: str = " ",
depth: int = 0,
depth: int = -1,
branches: int = 1,
leaves: int = 0,
) -> tuple[int, int]:
@ -93,8 +50,11 @@ def print_output(
branches, leaves = print_output(
sub, indent, depth + 1, branches + 1, leaves
)
else:
elif type(sub) == str:
leaves += 1
# Highlight special elements (in red for the moment)
for elmnt in SPECIAL_OUTPUT:
sub = elmnt.sub(esc(BOLD, GREEN) + r"\1" + esc(), sub)
print(indent * depth + sub)
return (branches, leaves)
@ -125,12 +85,58 @@ def main(*argv):
root: Rubrique = RootRubrique()
# Write everything & print the output human-readably
sections, articles = print_output(root.write_tree(CFG.output_dir))
branches, leaves = print_output(root.write_tree(CFG.output_dir))
# End, summary message
print(f"Exported a total of {sections} sections, containing {articles} articles")
print(
f"""
Exported a total of {leaves} Markdown files, stored into {branches} directories"""
)
# print() # Break line between export & unknown characters warning
# Warn about each article that contains unknown(s) character(s)
# TODO do it with Python warnings
DB.close() # Close the connection with the database
r""" OLD CODE
# Print the detected unknown chars in article in their context but highlighted
def warn_unknown_chars(article: Article) -> None:
# Print the title of the article in which there is unknown characters
# & the number of them
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
style(f"{nb}")
print(f" unknown character{s} in", end="")
style(f" {article.lang} ")
highlight(article.titre, *unknown_chars(article.titre))
print() # Break line
# Print the context in which the unknown characters are found
for text in unknown_chars_apparitions:
style("")
highlight(text, *unknown_chars(text))
style("\n")
print() # Break line
# Return a list of tuples giving the start and end of unknown substring in text
def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in UNKNOWN_ISO:
for match in finditer("(" + char + ")+", text):
positions.append((match.start(), match.end()))
return positions
# Return strings with unknown chards found in text, surrounded by context_length chars
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}"
for char in UNKNOWN_ISO:
matches = finditer(
context + r"(?=" + char + r")" + char + context,
text,
)
for match in matches:
errors.append(match.group())
return errors
"""

View File

@ -256,31 +256,7 @@ UNKNOWN_ISO = (
# Special elements in terminal output to surround
SPECIAL_OUTPUT = (
(compile(r"^([0-9]+?\.)(?= )"), r"{}\1{}"), # Counter
(compile(r"(?<= )->(?= )"), r"{}->{}"), # Arrow
(compile(r"(?<=^Exporting )([0-9]+?)(?= )"), r"{}\1{}"), # Total
compile(r"^([0-9]+?\.)(?= )"), # Counter
compile(r"(?<= )(->)(?= )"), # Arrow
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
)
r"""
# Return a list of tuples giving the start and end of unknown substring in text
def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in UNKNOWN_ISO:
for match in finditer("(" + char + ")+", text):
positions.append((match.start(), match.end()))
return positions
# Return strings with unknown chards found in text, surrounded by context_length chars
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}"
for char in UNKNOWN_ISO:
matches = finditer(
context + r"(?=" + char + r")" + char + context,
text,
)
for match in matches:
errors.append(match.group())
return errors
"""

View File

@ -406,6 +406,7 @@ class RootRubrique(Rubrique):
# 0 ID
self.id_rubrique = 0
# self.object_id = 0
self.profondeur = 0
def write_tree(
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
@ -414,9 +415,11 @@ class RootRubrique(Rubrique):
output: list[str | list[Any]] = []
# Starting message
output.append(
f"Begin converting {CFG.db}@{CFG.db_host} db to plain Markdown+YAML files"
f"""\
Begin exporting `{CFG.db}@{CFG.db_host}` SPIP database to plain Markdown+YAML
files into the directory `{parent_dir}`, as database user `{CFG.db_user}`
"""
)
output.append(f" as db user {CFG.db_user}, into the directory {parent_dir}")
# Get all child section of self
child_sections = (
Rubrique.select()