cleaning, fixed indentation, styling
This commit is contained in:
parent
952595b34c
commit
cc549db945
@ -3,12 +3,10 @@
|
|||||||
import sys
|
import sys
|
||||||
from os import makedirs
|
from os import makedirs
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from peewee import ModelSelect
|
|
||||||
|
|
||||||
from spip2md.config import CFG
|
from spip2md.config import CFG
|
||||||
from spip2md.database import DB
|
from spip2md.database import DB
|
||||||
|
from spip2md.regexmap import SPECIAL_OUTPUT
|
||||||
from spip2md.spipobjects import RootRubrique, Rubrique
|
from spip2md.spipobjects import RootRubrique, Rubrique
|
||||||
|
|
||||||
# Define styles
|
# Define styles
|
||||||
@ -25,66 +23,25 @@ C1 = 96 # Color
|
|||||||
C2 = 96 # Color
|
C2 = 96 # Color
|
||||||
|
|
||||||
|
|
||||||
# Print a stylized string, without trailing newline
|
# Terminal escape sequence
|
||||||
def style(string: str, *args: int, end: str = "") -> None:
|
def esc(*args: int) -> str:
|
||||||
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
|
|
||||||
if len(args) == 0:
|
if len(args) == 0:
|
||||||
params: str = "1;" # Defaults to bold
|
params: str = "0;" # Defaults to reset
|
||||||
else:
|
else:
|
||||||
params: str = ""
|
params: str = ""
|
||||||
|
# Build a string from args, that will be stripped from its trailing ;
|
||||||
for a in args:
|
for a in args:
|
||||||
params += str(a) + ";"
|
params += str(a) + ";"
|
||||||
print(esc + params[:-1] + "m" + string + esc + "0m", end=end)
|
# Base terminal escape sequence that needs to be closed by "m"
|
||||||
|
return "\033[" + params[:-1] + "m"
|
||||||
|
|
||||||
# Print a string, highlighting every substring starting at start_stop[x][0] …
|
|
||||||
def highlight(string: str, *start_stop: tuple[int, int], end: str = "") -> None:
|
|
||||||
previous_stop = 0
|
|
||||||
for start, stop in start_stop:
|
|
||||||
print(string[previous_stop:start], end="")
|
|
||||||
style(string[start:stop], BOLD, RED)
|
|
||||||
previous_stop = stop
|
|
||||||
print(string[previous_stop:], end=end)
|
|
||||||
|
|
||||||
|
|
||||||
# Query the DB to retrieve all sections without parent, sorted by publication date
|
|
||||||
def root_sections(limit: int = 10**3) -> ModelSelect:
|
|
||||||
return (
|
|
||||||
Rubrique.select()
|
|
||||||
.where(Rubrique.id_parent == 0)
|
|
||||||
.order_by(Rubrique.date.desc())
|
|
||||||
.limit(limit)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
r"""
|
|
||||||
# Print the detected unknown chars in article in their context but highlighted
|
|
||||||
def warn_unknown_chars(article: Article) -> None:
|
|
||||||
# Print the title of the article in which there is unknown characters
|
|
||||||
# & the number of them
|
|
||||||
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
|
|
||||||
nb: int = len(unknown_chars_apparitions)
|
|
||||||
s: str = "s" if nb > 1 else ""
|
|
||||||
style(f"{nb}")
|
|
||||||
print(f" unknown character{s} in", end="")
|
|
||||||
style(f" {article.lang} ")
|
|
||||||
highlight(article.titre, *unknown_chars(article.titre))
|
|
||||||
print() # Break line
|
|
||||||
# Print the context in which the unknown characters are found
|
|
||||||
for text in unknown_chars_apparitions:
|
|
||||||
style(" … ")
|
|
||||||
highlight(text, *unknown_chars(text))
|
|
||||||
style(" … \n")
|
|
||||||
print() # Break line
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
# Print one root section list output correctly
|
# Print one root section list output correctly
|
||||||
# sys.setrecursionlimit(2000)
|
# sys.setrecursionlimit(2000)
|
||||||
def print_output(
|
def print_output(
|
||||||
tree: list[Any],
|
tree: list[str | list[str | list]],
|
||||||
indent: str = " ",
|
indent: str = " ",
|
||||||
depth: int = 0,
|
depth: int = -1,
|
||||||
branches: int = 1,
|
branches: int = 1,
|
||||||
leaves: int = 0,
|
leaves: int = 0,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
@ -93,8 +50,11 @@ def print_output(
|
|||||||
branches, leaves = print_output(
|
branches, leaves = print_output(
|
||||||
sub, indent, depth + 1, branches + 1, leaves
|
sub, indent, depth + 1, branches + 1, leaves
|
||||||
)
|
)
|
||||||
else:
|
elif type(sub) == str:
|
||||||
leaves += 1
|
leaves += 1
|
||||||
|
# Highlight special elements (in red for the moment)
|
||||||
|
for elmnt in SPECIAL_OUTPUT:
|
||||||
|
sub = elmnt.sub(esc(BOLD, GREEN) + r"\1" + esc(), sub)
|
||||||
print(indent * depth + sub)
|
print(indent * depth + sub)
|
||||||
return (branches, leaves)
|
return (branches, leaves)
|
||||||
|
|
||||||
@ -125,12 +85,58 @@ def main(*argv):
|
|||||||
root: Rubrique = RootRubrique()
|
root: Rubrique = RootRubrique()
|
||||||
|
|
||||||
# Write everything & print the output human-readably
|
# Write everything & print the output human-readably
|
||||||
sections, articles = print_output(root.write_tree(CFG.output_dir))
|
branches, leaves = print_output(root.write_tree(CFG.output_dir))
|
||||||
# End, summary message
|
# End, summary message
|
||||||
print(f"Exported a total of {sections} sections, containing {articles} articles")
|
print(
|
||||||
|
f"""
|
||||||
|
Exported a total of {leaves} Markdown files, stored into {branches} directories"""
|
||||||
|
)
|
||||||
|
|
||||||
# print() # Break line between export & unknown characters warning
|
# print() # Break line between export & unknown characters warning
|
||||||
# Warn about each article that contains unknown(s) character(s)
|
# Warn about each article that contains unknown(s) character(s)
|
||||||
# TODO do it with Python warnings
|
# TODO do it with Python warnings
|
||||||
|
|
||||||
DB.close() # Close the connection with the database
|
DB.close() # Close the connection with the database
|
||||||
|
|
||||||
|
|
||||||
|
r""" OLD CODE
|
||||||
|
# Print the detected unknown chars in article in their context but highlighted
|
||||||
|
def warn_unknown_chars(article: Article) -> None:
|
||||||
|
# Print the title of the article in which there is unknown characters
|
||||||
|
# & the number of them
|
||||||
|
unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
|
||||||
|
nb: int = len(unknown_chars_apparitions)
|
||||||
|
s: str = "s" if nb > 1 else ""
|
||||||
|
style(f"{nb}")
|
||||||
|
print(f" unknown character{s} in", end="")
|
||||||
|
style(f" {article.lang} ")
|
||||||
|
highlight(article.titre, *unknown_chars(article.titre))
|
||||||
|
print() # Break line
|
||||||
|
# Print the context in which the unknown characters are found
|
||||||
|
for text in unknown_chars_apparitions:
|
||||||
|
style(" … ")
|
||||||
|
highlight(text, *unknown_chars(text))
|
||||||
|
style(" … \n")
|
||||||
|
print() # Break line
|
||||||
|
|
||||||
|
# Return a list of tuples giving the start and end of unknown substring in text
|
||||||
|
def unknown_chars(text: str) -> list[tuple[int, int]]:
|
||||||
|
positions: list[tuple[int, int]] = []
|
||||||
|
for char in UNKNOWN_ISO:
|
||||||
|
for match in finditer("(" + char + ")+", text):
|
||||||
|
positions.append((match.start(), match.end()))
|
||||||
|
return positions
|
||||||
|
|
||||||
|
# Return strings with unknown chards found in text, surrounded by context_length chars
|
||||||
|
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
|
||||||
|
errors: list[str] = []
|
||||||
|
context: str = r".{0," + str(context_length) + r"}"
|
||||||
|
for char in UNKNOWN_ISO:
|
||||||
|
matches = finditer(
|
||||||
|
context + r"(?=" + char + r")" + char + context,
|
||||||
|
text,
|
||||||
|
)
|
||||||
|
for match in matches:
|
||||||
|
errors.append(match.group())
|
||||||
|
return errors
|
||||||
|
"""
|
||||||
|
@ -256,31 +256,7 @@ UNKNOWN_ISO = (
|
|||||||
|
|
||||||
# Special elements in terminal output to surround
|
# Special elements in terminal output to surround
|
||||||
SPECIAL_OUTPUT = (
|
SPECIAL_OUTPUT = (
|
||||||
(compile(r"^([0-9]+?\.)(?= )"), r"{}\1{}"), # Counter
|
compile(r"^([0-9]+?\.)(?= )"), # Counter
|
||||||
(compile(r"(?<= )->(?= )"), r"{}->{}"), # Arrow
|
compile(r"(?<= )(->)(?= )"), # Arrow
|
||||||
(compile(r"(?<=^Exporting )([0-9]+?)(?= )"), r"{}\1{}"), # Total
|
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
r"""
|
|
||||||
# Return a list of tuples giving the start and end of unknown substring in text
|
|
||||||
def unknown_chars(text: str) -> list[tuple[int, int]]:
|
|
||||||
positions: list[tuple[int, int]] = []
|
|
||||||
for char in UNKNOWN_ISO:
|
|
||||||
for match in finditer("(" + char + ")+", text):
|
|
||||||
positions.append((match.start(), match.end()))
|
|
||||||
return positions
|
|
||||||
|
|
||||||
# Return strings with unknown chards found in text, surrounded by context_length chars
|
|
||||||
def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
|
|
||||||
errors: list[str] = []
|
|
||||||
context: str = r".{0," + str(context_length) + r"}"
|
|
||||||
for char in UNKNOWN_ISO:
|
|
||||||
matches = finditer(
|
|
||||||
context + r"(?=" + char + r")" + char + context,
|
|
||||||
text,
|
|
||||||
)
|
|
||||||
for match in matches:
|
|
||||||
errors.append(match.group())
|
|
||||||
return errors
|
|
||||||
"""
|
|
||||||
|
@ -406,6 +406,7 @@ class RootRubrique(Rubrique):
|
|||||||
# 0 ID
|
# 0 ID
|
||||||
self.id_rubrique = 0
|
self.id_rubrique = 0
|
||||||
# self.object_id = 0
|
# self.object_id = 0
|
||||||
|
self.profondeur = 0
|
||||||
|
|
||||||
def write_tree(
|
def write_tree(
|
||||||
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
|
self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
|
||||||
@ -414,9 +415,11 @@ class RootRubrique(Rubrique):
|
|||||||
output: list[str | list[Any]] = []
|
output: list[str | list[Any]] = []
|
||||||
# Starting message
|
# Starting message
|
||||||
output.append(
|
output.append(
|
||||||
f"Begin converting {CFG.db}@{CFG.db_host} db to plain Markdown+YAML files"
|
f"""\
|
||||||
|
Begin exporting `{CFG.db}@{CFG.db_host}` SPIP database to plain Markdown+YAML
|
||||||
|
files into the directory `{parent_dir}`, as database user `{CFG.db_user}`
|
||||||
|
"""
|
||||||
)
|
)
|
||||||
output.append(f" as db user {CFG.db_user}, into the directory {parent_dir}")
|
|
||||||
# Get all child section of self
|
# Get all child section of self
|
||||||
child_sections = (
|
child_sections = (
|
||||||
Rubrique.select()
|
Rubrique.select()
|
||||||
|
Loading…
Reference in New Issue
Block a user