cleaning, fixed indentation, styling

This commit is contained in:
Guilhem Fauré 2023-05-26 17:20:30 +02:00
parent 952595b34c
commit cc549db945
3 changed files with 69 additions and 84 deletions

View File

@ -3,12 +3,10 @@
import sys import sys
from os import makedirs from os import makedirs
from shutil import rmtree from shutil import rmtree
from typing import Any
from peewee import ModelSelect
from spip2md.config import CFG from spip2md.config import CFG
from spip2md.database import DB from spip2md.database import DB
from spip2md.regexmap import SPECIAL_OUTPUT
from spip2md.spipobjects import RootRubrique, Rubrique from spip2md.spipobjects import RootRubrique, Rubrique
# Define styles # Define styles
@ -25,66 +23,25 @@ C1 = 96 # Color
C2 = 96 # Color C2 = 96 # Color
# Print a stylized string, without trailing newline # Terminal escape sequence
def style(string: str, *args: int, end: str = "") -> None: def esc(*args: int) -> str:
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
if len(args) == 0: if len(args) == 0:
params: str = "1;" # Defaults to bold params: str = "0;" # Defaults to reset
else: else:
params: str = "" params: str = ""
# Build a string from args, that will be stripped from its trailing ;
for a in args: for a in args:
params += str(a) + ";" params += str(a) + ";"
print(esc + params[:-1] + "m" + string + esc + "0m", end=end) # Base terminal escape sequence that needs to be closed by "m"
return "\033[" + params[:-1] + "m"
# Print a string, highlighting every substring starting at start_stop[x][0] …
def highlight(string: str, *start_stop: tuple[int, int], end: str = "") -> None:
previous_stop = 0
for start, stop in start_stop:
print(string[previous_stop:start], end="")
style(string[start:stop], BOLD, RED)
previous_stop = stop
print(string[previous_stop:], end=end)
# Query the DB to retrieve all sections without parent, sorted by publication date
def root_sections(limit: int = 10**3) -> ModelSelect:
return (
Rubrique.select()
.where(Rubrique.id_parent == 0)
.order_by(Rubrique.date.desc())
.limit(limit)
)
r"""
# Print the detected unknown chars in article in their context but highlighted
def warn_unknown_chars(article: Article) -> None:
# Print the title of the article in which there is unknown characters
# & the number of them
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
style(f"{nb}")
print(f" unknown character{s} in", end="")
style(f" {article.lang} ")
highlight(article.titre, *unknown_chars(article.titre))
print() # Break line
# Print the context in which the unknown characters are found
for text in unknown_chars_apparitions:
style("")
highlight(text, *unknown_chars(text))
style("\n")
print() # Break line
"""
# Print one root section list output correctly # Print one root section list output correctly
# sys.setrecursionlimit(2000) # sys.setrecursionlimit(2000)
def print_output( def print_output(
tree: list[Any], tree: list[str | list[str | list]],
indent: str = " ", indent: str = " ",
depth: int = 0, depth: int = -1,
branches: int = 1, branches: int = 1,
leaves: int = 0, leaves: int = 0,
) -> tuple[int, int]: ) -> tuple[int, int]:
@ -93,8 +50,11 @@ def print_output(
branches, leaves = print_output( branches, leaves = print_output(
sub, indent, depth + 1, branches + 1, leaves sub, indent, depth + 1, branches + 1, leaves
) )
else: elif type(sub) == str:
leaves += 1 leaves += 1
# Highlight special elements (in red for the moment)
for elmnt in SPECIAL_OUTPUT:
sub = elmnt.sub(esc(BOLD, GREEN) + r"\1" + esc(), sub)
print(indent * depth + sub) print(indent * depth + sub)
return (branches, leaves) return (branches, leaves)
@ -125,12 +85,58 @@ def main(*argv):
root: Rubrique = RootRubrique() root: Rubrique = RootRubrique()
# Write everything & print the output human-readably # Write everything & print the output human-readably
sections, articles = print_output(root.write_tree(CFG.output_dir)) branches, leaves = print_output(root.write_tree(CFG.output_dir))
# End, summary message # End, summary message
print(f"Exported a total of {sections} sections, containing {articles} articles") print(
f"""
Exported a total of {leaves} Markdown files, stored into {branches} directories"""
)
# print() # Break line between export & unknown characters warning # print() # Break line between export & unknown characters warning
# Warn about each article that contains unknown(s) character(s) # Warn about each article that contains unknown(s) character(s)
# TODO do it with Python warnings # TODO do it with Python warnings
DB.close() # Close the connection with the database DB.close() # Close the connection with the database
r""" OLD CODE
# Print the detected unknown chars in article in their context but highlighted
def warn_unknown_chars(article: Article) -> None:
# Print the title of the article in which there is unknown characters
# & the number of them
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else ""
style(f"{nb}")
print(f" unknown character{s} in", end="")
style(f" {article.lang} ")
highlight(article.titre, *unknown_chars(article.titre))
print() # Break line
# Print the context in which the unknown characters are found
for text in unknown_chars_apparitions:
style("")
highlight(text, *unknown_chars(text))
style("\n")
print() # Break line
# Return a list of tuples giving the start and end of unknown substring in text
def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in UNKNOWN_ISO:
for match in finditer("(" + char + ")+", text):
positions.append((match.start(), match.end()))
return positions
# Return strings with unknown chards found in text, surrounded by context_length chars
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}"
for char in UNKNOWN_ISO:
matches = finditer(
context + r"(?=" + char + r")" + char + context,
text,
)
for match in matches:
errors.append(match.group())
return errors
"""

View File

@ -256,31 +256,7 @@ UNKNOWN_ISO = (
# Special elements in terminal output to surround # Special elements in terminal output to surround
SPECIAL_OUTPUT = ( SPECIAL_OUTPUT = (
(compile(r"^([0-9]+?\.)(?= )"), r"{}\1{}"), # Counter compile(r"^([0-9]+?\.)(?= )"), # Counter
(compile(r"(?<= )->(?= )"), r"{}->{}"), # Arrow compile(r"(?<= )(->)(?= )"), # Arrow
(compile(r"(?<=^Exporting )([0-9]+?)(?= )"), r"{}\1{}"), # Total compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
) )
r"""
# Return a list of tuples giving the start and end of unknown substring in text
def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in UNKNOWN_ISO:
for match in finditer("(" + char + ")+", text):
positions.append((match.start(), match.end()))
return positions
# Return strings with unknown chards found in text, surrounded by context_length chars
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}"
for char in UNKNOWN_ISO:
matches = finditer(
context + r"(?=" + char + r")" + char + context,
text,
)
for match in matches:
errors.append(match.group())
return errors
"""

View File

@ -406,6 +406,7 @@ class RootRubrique(Rubrique):
# 0 ID # 0 ID
self.id_rubrique = 0 self.id_rubrique = 0
# self.object_id = 0 # self.object_id = 0
self.profondeur = 0
def write_tree( def write_tree(
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0 self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
@ -414,9 +415,11 @@ class RootRubrique(Rubrique):
output: list[str | list[Any]] = [] output: list[str | list[Any]] = []
# Starting message # Starting message
output.append( output.append(
f"Begin converting {CFG.db}@{CFG.db_host} db to plain Markdown+YAML files" f"""\
Begin exporting `{CFG.db}@{CFG.db_host}` SPIP database to plain Markdown+YAML
files into the directory `{parent_dir}`, as database user `{CFG.db_user}`
"""
) )
output.append(f" as db user {CFG.db_user}, into the directory {parent_dir}")
# Get all child section of self # Get all child section of self
child_sections = ( child_sections = (
Rubrique.select() Rubrique.select()