coloring big refactor + init copy of assets

This commit is contained in:
Guilhem Fauré 2023-05-22 16:48:47 +02:00
parent 95ccc5fab5
commit ae7063e241
3 changed files with 89 additions and 49 deletions

View File

@ -275,6 +275,7 @@ unknown_iso = (
) )
# Apply spip_to_markdown conversions to a text
def convert_body(text: Optional[str]) -> str: def convert_body(text: Optional[str]) -> str:
if text is None: if text is None:
return "" return ""
@ -285,6 +286,7 @@ def convert_body(text: Optional[str]) -> str:
return text return text
# Apply spip_to_text conversions to a text
def convert_meta(text: Optional[str]) -> str: def convert_meta(text: Optional[str]) -> str:
if text is None: if text is None:
return "" return ""
@ -295,22 +297,23 @@ def convert_meta(text: Optional[str]) -> str:
return text return text
# Replace unknown chars with empty strings (delete them)
def remove_unknown_chars(text: str) -> str: def remove_unknown_chars(text: str) -> str:
for char in unknown_iso: for char in unknown_iso:
text.replace(char, "") text.replace(char, "")
return text return text
def highlight_unknown_chars(text: str, pre: str, post: str) -> str: # Return a list of tuples giving the start and end of unknown substring in text
# Add pre before unknown char and post after unknown char def unknown_chars(text: str) -> list[tuple[int, int]]:
positions: list[tuple[int, int]] = []
for char in unknown_iso: for char in unknown_iso:
for match in finditer("(" + char + ")+", text): for match in finditer("(" + char + ")+", text):
text = ( positions.append((match.start(), match.end()))
text[: match.start()] + pre + match.group() + post + text[match.end() :] return positions
)
return text
# Return strings with unknown chards found in text, surrounded by context_length chars
def get_unknown_chars(text: str, context_length: int = 20) -> list[str]: def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
errors: list[str] = [] errors: list[str] = []
context: str = r".{0," + str(context_length) + r"}" context: str = r".{0," + str(context_length) + r"}"

View File

@ -166,6 +166,7 @@ class Document:
self.creation: str = document.date self.creation: str = document.date
self.publication: str = document.date_publication self.publication: str = document.date_publication
self.update: str = document.maj self.update: str = document.maj
self.media: str = document.media
def get_slug(self, date: bool = False) -> str: def get_slug(self, date: bool = False) -> str:
return slugify((self.publication + "-" if date else "") + self.title) return slugify((self.publication + "-" if date else "") + self.title)

View File

@ -1,26 +1,56 @@
#!python #!python
# pyright: strict # pyright: strict
from os import makedirs, mkdir from os import makedirs, mkdir
from shutil import rmtree from shutil import copyfile, rmtree
from sys import argv from sys import argv
from config import config from config import config
from converter import get_unknown_chars, highlight_unknown_chars from converter import get_unknown_chars, unknown_chars
from database import db from database import db
from items import Article, Sections from items import Article, Documents, Sections
# Print a stylized string, without trailing newline
def style(string: str, *args: int) -> None:
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
if len(args) == 0:
params: str = "1;" # Defaults to bold
else:
params: str = ""
for a in args:
params += str(a) + ";"
print(esc + params[:-1] + "m" + string + esc + "0m", end="")
# Define styles
BO = 1 # Bold
IT = 3 # Italic
UN = 4 # Underline
# Define colors
R = 91 # Red
G = 92 # Green
Y = 93 # Yellow
B = 94 # Blue
C0 = 95 # Color
C1 = 96 # Color
C2 = 96 # Color
# Print a string, highlighting every substring starting at start_stop[x][0] …
def highlight(string: str, *start_stop: tuple[int, int]) -> None:
previous_stop = 0
for start, stop in start_stop:
print(string[previous_stop:start], end="")
style(string[start:stop], BO, R)
previous_stop = stop
print(string[previous_stop:], end="")
# Define terminal escape sequences to stylize output
R: str = "\033[91m"
G: str = "\033[92m"
B: str = "\033[94m"
BOLD: str = "\033[1m"
RESET: str = "\033[0m"
# Connect to the MySQL database with Peewee ORM # Connect to the MySQL database with Peewee ORM
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
db.connect() db.connect()
if __name__ == "__main__": # Following is executed only if script is directly executed if __name__ == "__main__": # Only if script is directly executed
# Define max nb of articles to export based on first CLI argument # Define max nb of articles to export based on first CLI argument
if len(argv) >= 2: if len(argv) >= 2:
toexport = int(argv[1]) toexport = int(argv[1])
@ -41,22 +71,16 @@ if __name__ == "__main__": # Following is executed only if script is directly e
break break
articles = section.get_articles(toexport) articles = section.get_articles(toexport)
# Print the name of the exported section & number of remaining sections # Print the name of the exported section & number of remaining sections
print( style(f"{counter.count + 1}. ", BO)
f"{BOLD}{counter.count + 1}. {RESET}" highlight(section.title, *unknown_chars(section.title))
+ highlight_unknown_chars(section.title, R, RESET),
end="",
)
if counter.remaining() > 2: if counter.remaining() > 2:
print( style(f" {counter.remaining()-1}", BO, G)
f" {BOLD}{B}{counter.remaining()-1}{RESET} {BOLD}sections left" style(" sections")
+ RESET, print(" left to export", end="")
end="",
)
if toexport > 1: if toexport > 1:
print( style(f" {toexport}", BO, Y)
f" {BOLD}Export limit is in {R}{toexport}{RESET} articles{RESET}", style(" articles")
end="", print(" left before export limit", end="")
)
print() print()
# Define the sections path (directory) & create directory(ies) if needed # Define the sections path (directory) & create directory(ies) if needed
sectiondir: str = config.output_dir + "/" + section.get_slug() sectiondir: str = config.output_dir + "/" + section.get_slug()
@ -70,17 +94,18 @@ if __name__ == "__main__": # Following is executed only if script is directly e
# Print the remaining number of articles to export every 100 articles # Print the remaining number of articles to export every 100 articles
if counter.count % 100 == 0: if counter.count % 100 == 0:
s: str = "s" if counter.remaining() > 1 else "" s: str = "s" if counter.remaining() > 1 else ""
print( print(" Exporting", end="")
f" {BOLD}Exporting {G}{counter.remaining()}{RESET}" style(f" {counter.remaining()}", BO, Y)
+ f"{BOLD} SPIP article{s}{RESET} to Markdown & YAML files" print(" SPIP", end="")
) style(f" article{s}")
print(" to Markdown & YAML files")
# Print the title of the article being exported # Print the title of the article being exported
print( style(
f" {BOLD}{counter.count + 1}. " f" {counter.count + 1}. "
+ ("EMPTY " if len(article.text) < 1 else "") + ("EMPTY " if len(article.text) < 1 else "")
+ f"{article.lang} {RESET}" + f"{article.lang} "
+ highlight_unknown_chars(article.title, R, RESET)
) )
highlight(article.title, *unknown_chars(article.title))
# Define the full article path & create directory(ies) if needed # Define the full article path & create directory(ies) if needed
articledir: str = sectiondir + "/" + article.get_slug() articledir: str = sectiondir + "/" + article.get_slug()
makedirs(articledir, exist_ok=True) makedirs(articledir, exist_ok=True)
@ -91,10 +116,23 @@ if __name__ == "__main__": # Following is executed only if script is directly e
# Store articles with unknown characters # Store articles with unknown characters
if len(get_unknown_chars(article.text)) > 0: if len(get_unknown_chars(article.text)) > 0:
unknown_chars_articles.append(article) unknown_chars_articles.append(article)
# Loop over articles related files (images …)
for document, counter in Documents(article.id):
# Print the name of the file with a counter
style(f" {counter.count + 1}. {document.media} ")
highlight(article.title, *unknown_chars(article.title))
# Copy the document from its SPIP location to the new location
copyfile(config.data_dir + "/" + document.file, document.get_slug())
# Print the outputted files path when copied the file
style(" -->", BO, B)
print(f" {articledir}/{document.get_slug()}")
# Print the outputted files path when finished exporting the article # Print the outputted files path when finished exporting the article
print(f" {BOLD}{G}-->{RESET} {articlepath}") style(" --> ", BO, Y)
print(articlepath)
# Print the outputted files path when finished exporting the section # Print the outputted files path when finished exporting the section
print(f"{BOLD}{B}-->{RESET} {sectionpath}\n") style(" --> ", BO, G)
print(sectionpath)
print()
# Decrement export limit with length of exported section # Decrement export limit with length of exported section
toexport -= len(articles) toexport -= len(articles)
@ -105,16 +143,14 @@ if __name__ == "__main__": # Following is executed only if script is directly e
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text) unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
nb: int = len(unknown_chars_apparitions) nb: int = len(unknown_chars_apparitions)
s: str = "s" if nb > 1 else "" s: str = "s" if nb > 1 else ""
print( style(f"\n{nb}")
f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} " print(f" unknown character{s} in")
+ highlight_unknown_chars(article.title, R, RESET) style(f" {article.lang} ")
) highlight(article.title, *unknown_chars(article.title))
# Print the context in which the unknown characters are found # Print the context in which the unknown characters are found
for text in unknown_chars_apparitions: for text in unknown_chars_apparitions:
print( style("")
f" {BOLD}{RESET} " highlight(text, *unknown_chars(text))
+ highlight_unknown_chars(text, R, RESET) style("")
+ f" {BOLD}{RESET}"
)
db.close() # Close the connection with the database db.close() # Close the connection with the database