From ae7063e241af88d515eb33719bb3877828d174ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= <pro@gfaure.eu>
Date: Mon, 22 May 2023 16:48:47 +0200
Subject: [PATCH] coloring big refactor + init copy of assets

---
 spip2md/converter.py |  15 +++---
 spip2md/items.py     |   1 +
 spip2md/main.py      | 122 ++++++++++++++++++++++++++++---------------
 3 files changed, 89 insertions(+), 49 deletions(-)

diff --git a/spip2md/converter.py b/spip2md/converter.py
index 7f5182a..440ccce 100644
--- a/spip2md/converter.py
+++ b/spip2md/converter.py
@@ -275,6 +275,7 @@ unknown_iso = (
 )
 
 
+# Apply spip_to_markdown conversions to a text
 def convert_body(text: Optional[str]) -> str:
     if text is None:
         return ""
@@ -285,6 +286,7 @@ def convert_body(text: Optional[str]) -> str:
     return text
 
 
+# Apply spip_to_text conversions to a text
 def convert_meta(text: Optional[str]) -> str:
     if text is None:
         return ""
@@ -295,22 +297,23 @@ def convert_meta(text: Optional[str]) -> str:
     return text
 
 
+# Replace unknown chars with empty strings (delete them)
 def remove_unknown_chars(text: str) -> str:
     for char in unknown_iso:
         text.replace(char, "")
     return text
 
 
-def highlight_unknown_chars(text: str, pre: str, post: str) -> str:
-    # Add pre before unknown char and post after unknown char
+# Return a list of tuples giving the start and end of unknown substring in text
+def unknown_chars(text: str) -> list[tuple[int, int]]:
+    positions: list[tuple[int, int]] = []
     for char in unknown_iso:
         for match in finditer("(" + char + ")+", text):
-            text = (
-                text[: match.start()] + pre + match.group() + post + text[match.end() :]
-            )
-    return text
+            positions.append((match.start(), match.end()))
+    return positions
 
 
+# Return strings with unknown chards found in text, surrounded by context_length chars
 def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
     errors: list[str] = []
     context: str = r".{0," + str(context_length) + r"}"
diff --git a/spip2md/items.py b/spip2md/items.py
index bc57de3..7b8f5a3 100644
--- a/spip2md/items.py
+++ b/spip2md/items.py
@@ -166,6 +166,7 @@ class Document:
         self.creation: str = document.date
         self.publication: str = document.date_publication
         self.update: str = document.maj
+        self.media: str = document.media
 
     def get_slug(self, date: bool = False) -> str:
         return slugify((self.publication + "-" if date else "") + self.title)
diff --git a/spip2md/main.py b/spip2md/main.py
index cfb9e5a..14a6476 100755
--- a/spip2md/main.py
+++ b/spip2md/main.py
@@ -1,26 +1,56 @@
 #!python
 # pyright: strict
 from os import makedirs, mkdir
-from shutil import rmtree
+from shutil import copyfile, rmtree
 from sys import argv
 
 from config import config
-from converter import get_unknown_chars, highlight_unknown_chars
+from converter import get_unknown_chars, unknown_chars
 from database import db
-from items import Article, Sections
+from items import Article, Documents, Sections
+
+
+# Print a stylized string, without trailing newline
+def style(string: str, *args: int) -> None:
+    esc = "\033["  # Terminal escape sequence, needs to be closed by "m"
+    if len(args) == 0:
+        params: str = "1;"  # Defaults to bold
+    else:
+        params: str = ""
+    for a in args:
+        params += str(a) + ";"
+    print(esc + params[:-1] + "m" + string + esc + "0m", end="")
+
+
+# Define styles
+BO = 1  # Bold
+IT = 3  # Italic
+UN = 4  # Underline
+# Define colors
+R = 91  # Red
+G = 92  # Green
+Y = 93  # Yellow
+B = 94  # Blue
+C0 = 95  # Color
+C1 = 96  # Color
+C2 = 96  # Color
+
+
+# Print a string, highlighting every substring starting at start_stop[x][0] …
+def highlight(string: str, *start_stop: tuple[int, int]) -> None:
+    previous_stop = 0
+    for start, stop in start_stop:
+        print(string[previous_stop:start], end="")
+        style(string[start:stop], BO, R)
+        previous_stop = stop
+    print(string[previous_stop:], end="")
 
-# Define terminal escape sequences to stylize output
-R: str = "\033[91m"
-G: str = "\033[92m"
-B: str = "\033[94m"
-BOLD: str = "\033[1m"
-RESET: str = "\033[0m"
 
 # Connect to the MySQL database with Peewee ORM
 db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
 db.connect()
 
-if __name__ == "__main__":  # Following is executed only if script is directly executed
+if __name__ == "__main__":  # Only if script is directly executed
     # Define max nb of articles to export based on first CLI argument
     if len(argv) >= 2:
         toexport = int(argv[1])
@@ -41,22 +71,16 @@ if __name__ == "__main__":  # Following is executed only if script is directly e
             break
         articles = section.get_articles(toexport)
         # Print the name of the exported section & number of remaining sections
-        print(
-            f"{BOLD}{counter.count + 1}. {RESET}"
-            + highlight_unknown_chars(section.title, R, RESET),
-            end="",
-        )
+        style(f"{counter.count + 1}. ", BO)
+        highlight(section.title, *unknown_chars(section.title))
         if counter.remaining() > 2:
-            print(
-                f"   {BOLD}{B}{counter.remaining()-1}{RESET} {BOLD}sections left"
-                + RESET,
-                end="",
-            )
+            style(f"   {counter.remaining()-1}", BO, G)
+            style(" sections")
+            print(" left to export", end="")
         if toexport > 1:
-            print(
-                f"   {BOLD}Export limit is in {R}{toexport}{RESET} articles{RESET}",
-                end="",
-            )
+            style(f"   {toexport}", BO, Y)
+            style(" articles")
+            print(" left before export limit", end="")
         print()
         # Define the section’s path (directory) & create directory(ies) if needed
         sectiondir: str = config.output_dir + "/" + section.get_slug()
@@ -70,17 +94,18 @@ if __name__ == "__main__":  # Following is executed only if script is directly e
             # Print the remaining number of articles to export every 100 articles
             if counter.count % 100 == 0:
                 s: str = "s" if counter.remaining() > 1 else ""
-                print(
-                    f"  {BOLD}Exporting {G}{counter.remaining()}{RESET}"
-                    + f"{BOLD} SPIP article{s}{RESET} to Markdown & YAML files"
-                )
+                print("  Exporting", end="")
+                style(f" {counter.remaining()}", BO, Y)
+                print(" SPIP", end="")
+                style(f" article{s}")
+                print(" to Markdown & YAML files")
             # Print the title of the article being exported
-            print(
-                f"  {BOLD}{counter.count + 1}. "
+            style(
+                f"  {counter.count + 1}. "
                 + ("EMPTY " if len(article.text) < 1 else "")
-                + f"{article.lang} {RESET}"
-                + highlight_unknown_chars(article.title, R, RESET)
+                + f"{article.lang} "
             )
+            highlight(article.title, *unknown_chars(article.title))
             # Define the full article path & create directory(ies) if needed
             articledir: str = sectiondir + "/" + article.get_slug()
             makedirs(articledir, exist_ok=True)
@@ -91,10 +116,23 @@ if __name__ == "__main__":  # Following is executed only if script is directly e
             # Store articles with unknown characters
             if len(get_unknown_chars(article.text)) > 0:
                 unknown_chars_articles.append(article)
+            # Loop over article’s related files (images …)
+            for document, counter in Documents(article.id):
+                # Print the name of the file with a counter
+                style(f"    {counter.count + 1}. {document.media} ")
+                highlight(article.title, *unknown_chars(article.title))
+                # Copy the document from it’s SPIP location to the new location
+                copyfile(config.data_dir + "/" + document.file, document.get_slug())
+                # Print the outputted file’s path when copied the file
+                style("    -->", BO, B)
+                print(f" {articledir}/{document.get_slug()}")
             # Print the outputted file’s path when finished exporting the article
-            print(f"  {BOLD}{G}-->{RESET} {articlepath}")
+            style("  --> ", BO, Y)
+            print(articlepath)
         # Print the outputted file’s path when finished exporting the section
-        print(f"{BOLD}{B}-->{RESET} {sectionpath}\n")
+        style("  --> ", BO, G)
+        print(sectionpath)
+        print()
         # Decrement export limit with length of exported section
         toexport -= len(articles)
 
@@ -105,16 +143,14 @@ if __name__ == "__main__":  # Following is executed only if script is directly e
         unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
         nb: int = len(unknown_chars_apparitions)
         s: str = "s" if nb > 1 else ""
-        print(
-            f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} "
-            + highlight_unknown_chars(article.title, R, RESET)
-        )
+        style(f"\n{nb}")
+        print(f" unknown character{s} in")
+        style(f" {article.lang} ")
+        highlight(article.title, *unknown_chars(article.title))
         # Print the context in which the unknown characters are found
         for text in unknown_chars_apparitions:
-            print(
-                f"  {BOLD}…{RESET} "
-                + highlight_unknown_chars(text, R, RESET)
-                + f" {BOLD}…{RESET}"
-            )
+            style("  … ")
+            highlight(text, *unknown_chars(text))
+            style("  … ")
 
     db.close()  # Close the connection with the database