cleaning, fixed indentation, styling

2023-05-26 17:20:30 +02:00 · 2023-05-26 17:20:30 +02:00 · cc549db945
commit cc549db945
parent 952595b34c
3 changed files with 69 additions and 84 deletions
--- a/spip2md/init.py
+++ b/spip2md/init.py
@ -3,12 +3,10 @@
 import sys
 from os import makedirs
 from shutil import rmtree
 from typing import Any
 from peewee import ModelSelect
 from spip2md.config import CFG
 from spip2md.database import DB
 from spip2md.regexmap import SPECIAL_OUTPUT
 from spip2md.spipobjects import RootRubrique, Rubrique
 # Define styles
@ -25,66 +23,25 @@ C1 = 96  # Color
 C2 = 96  # Color
-# Print a stylized string, without trailing newline
+# Terminal escape sequence
-def style(string: str, *args: int, end: str = "") -> None:
+def esc(*args: int) -> str:
    esc = "\033["  # Terminal escape sequence, needs to be closed by "m"
    if len(args) == 0:
-        params: str = "1;"  # Defaults to bold
+        params: str = "0;"  # Defaults to reset
    else:
        params: str = ""
    # Build a string from args, that will be stripped from its trailing ;
    for a in args:
        params += str(a) + ";"
-    print(esc + params[:-1] + "m" + string + esc + "0m", end=end)
+    # Base terminal escape sequence that needs to be closed by "m"
-
+    return "\033[" + params[:-1] + "m"
 # Print a string, highlighting every substring starting at start_stop[x][0] …
 def highlight(string: str, *start_stop: tuple[int, int], end: str = "") -> None:
    previous_stop = 0
    for start, stop in start_stop:
        print(string[previous_stop:start], end="")
        style(string[start:stop], BOLD, RED)
        previous_stop = stop
    print(string[previous_stop:], end=end)
 # Query the DB to retrieve all sections without parent, sorted by publication date
 def root_sections(limit: int = 10**3) -> ModelSelect:
    return (
        Rubrique.select()
        .where(Rubrique.id_parent == 0)
        .order_by(Rubrique.date.desc())
        .limit(limit)
    )
 r"""
 # Print the detected unknown chars in article in their context but highlighted
 def warn_unknown_chars(article: Article) -> None:
    # Print the title of the article in which there is unknown characters
    # & the number of them
    unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
    nb: int = len(unknown_chars_apparitions)
    s: str = "s" if nb > 1 else ""
    style(f"{nb}")
    print(f" unknown character{s} in", end="")
    style(f" {article.lang} ")
    highlight(article.titre, *unknown_chars(article.titre))
    print()  # Break line
    # Print the context in which the unknown characters are found
    for text in unknown_chars_apparitions:
        style("  … ")
        highlight(text, *unknown_chars(text))
        style(" … \n")
    print()  # Break line
 """
 # Print one root section list output correctly
 # sys.setrecursionlimit(2000)
 def print_output(
-    tree: list[Any],
+    tree: list[str | list[str | list]],
    indent: str = "  ",
-    depth: int = 0,
+    depth: int = -1,
    branches: int = 1,
    leaves: int = 0,
 ) -> tuple[int, int]:
@ -93,8 +50,11 @@ def print_output(
            branches, leaves = print_output(
                sub, indent, depth + 1, branches + 1, leaves
            )
-        else:
+        elif type(sub) == str:
            leaves += 1
            # Highlight special elements (in red for the moment)
            for elmnt in SPECIAL_OUTPUT:
                sub = elmnt.sub(esc(BOLD, GREEN) + r"\1" + esc(), sub)
            print(indent * depth + sub)
    return (branches, leaves)
@ -125,12 +85,58 @@ def main(*argv):
    root: Rubrique = RootRubrique()
    # Write everything & print the output human-readably
-    sections, articles = print_output(root.write_tree(CFG.output_dir))
+    branches, leaves = print_output(root.write_tree(CFG.output_dir))
    # End, summary message
-    print(f"Exported a total of {sections} sections, containing {articles} articles")
+    print(
        f"""
 Exported a total of {leaves} Markdown files, stored into {branches} directories"""
    )
    # print()  # Break line between export & unknown characters warning
    # Warn about each article that contains unknown(s) character(s)
    # TODO do it with Python warnings
    DB.close()  # Close the connection with the database
 r""" OLD CODE
 # Print the detected unknown chars in article in their context but highlighted
 def warn_unknown_chars(article: Article) -> None:
    # Print the title of the article in which there is unknown characters
    # & the number of them
    unknown_chars_apparitions: list[str] = unknown_chars_context(article.texte)
    nb: int = len(unknown_chars_apparitions)
    s: str = "s" if nb > 1 else ""
    style(f"{nb}")
    print(f" unknown character{s} in", end="")
    style(f" {article.lang} ")
    highlight(article.titre, *unknown_chars(article.titre))
    print()  # Break line
    # Print the context in which the unknown characters are found
    for text in unknown_chars_apparitions:
        style("  … ")
        highlight(text, *unknown_chars(text))
        style(" … \n")
    print()  # Break line
 # Return a list of tuples giving the start and end of unknown substring in text
 def unknown_chars(text: str) -> list[tuple[int, int]]:
    positions: list[tuple[int, int]] = []
    for char in UNKNOWN_ISO:
        for match in finditer("(" + char + ")+", text):
            positions.append((match.start(), match.end()))
    return positions
 # Return strings with unknown chards found in text, surrounded by context_length chars
 def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
    errors: list[str] = []
    context: str = r".{0," + str(context_length) + r"}"
    for char in UNKNOWN_ISO:
        matches = finditer(
            context + r"(?=" + char + r")" + char + context,
            text,
        )
        for match in matches:
            errors.append(match.group())
    return errors
 """
--- a/spip2md/regexmap.py
+++ b/spip2md/regexmap.py
@ -256,31 +256,7 @@ UNKNOWN_ISO = (
 # Special elements in terminal output to surround
 SPECIAL_OUTPUT = (
-    (compile(r"^([0-9]+?\.)(?= )"), r"{}\1{}"),  # Counter
+    compile(r"^([0-9]+?\.)(?= )"),  # Counter
-    (compile(r"(?<= )->(?= )"), r"{}->{}"),  # Arrow
+    compile(r"(?<= )(->)(?= )"),  # Arrow
-    (compile(r"(?<=^Exporting )([0-9]+?)(?= )"), r"{}\1{}"),  # Total
+    compile(r"(?<=^Exporting )([0-9]+?)(?= )"),  # Total
 )
 r"""
 # Return a list of tuples giving the start and end of unknown substring in text
 def unknown_chars(text: str) -> list[tuple[int, int]]:
    positions: list[tuple[int, int]] = []
    for char in UNKNOWN_ISO:
        for match in finditer("(" + char + ")+", text):
            positions.append((match.start(), match.end()))
    return positions
 # Return strings with unknown chards found in text, surrounded by context_length chars
 def unknown_chars_context(text: str, context_length: int = 24) -> list[str]:
    errors: list[str] = []
    context: str = r".{0," + str(context_length) + r"}"
    for char in UNKNOWN_ISO:
        matches = finditer(
            context + r"(?=" + char + r")" + char + context,
            text,
        )
        for match in matches:
            errors.append(match.group())
    return errors
 """
--- a/spip2md/spipobjects.py
+++ b/spip2md/spipobjects.py
@ -406,6 +406,7 @@ class RootRubrique(Rubrique):
        # 0 ID
        self.id_rubrique = 0
        # self.object_id = 0
        self.profondeur = 0
    def write_tree(
        self, parent_dir: str, sections_limit: int = 0, articles_limit: int = 0
@ -414,9 +415,11 @@ class RootRubrique(Rubrique):
        output: list[str | list[Any]] = []
        # Starting message
        output.append(
-            f"Begin converting {CFG.db}@{CFG.db_host} db to plain Markdown+YAML files"
+            f"""\
 Begin exporting `{CFG.db}@{CFG.db_host}` SPIP database to plain Markdown+YAML
 files into the directory `{parent_dir}`, as database user `{CFG.db_user}`
 """
        )
        output.append(f" as db user {CFG.db_user}, into the directory {parent_dir}")
        # Get all child section of self
        child_sections = (
            Rubrique.select()