coloring big refactor + init copy of assets
This commit is contained in:
parent
95ccc5fab5
commit
ae7063e241
@ -275,6 +275,7 @@ unknown_iso = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Apply spip_to_markdown conversions to a text
|
||||||
def convert_body(text: Optional[str]) -> str:
|
def convert_body(text: Optional[str]) -> str:
|
||||||
if text is None:
|
if text is None:
|
||||||
return ""
|
return ""
|
||||||
@ -285,6 +286,7 @@ def convert_body(text: Optional[str]) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# Apply spip_to_text conversions to a text
|
||||||
def convert_meta(text: Optional[str]) -> str:
|
def convert_meta(text: Optional[str]) -> str:
|
||||||
if text is None:
|
if text is None:
|
||||||
return ""
|
return ""
|
||||||
@ -295,22 +297,23 @@ def convert_meta(text: Optional[str]) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# Replace unknown chars with empty strings (delete them)
|
||||||
def remove_unknown_chars(text: str) -> str:
|
def remove_unknown_chars(text: str) -> str:
|
||||||
for char in unknown_iso:
|
for char in unknown_iso:
|
||||||
text.replace(char, "")
|
text.replace(char, "")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def highlight_unknown_chars(text: str, pre: str, post: str) -> str:
|
# Return a list of tuples giving the start and end of unknown substring in text
|
||||||
# Add pre before unknown char and post after unknown char
|
def unknown_chars(text: str) -> list[tuple[int, int]]:
|
||||||
|
positions: list[tuple[int, int]] = []
|
||||||
for char in unknown_iso:
|
for char in unknown_iso:
|
||||||
for match in finditer("(" + char + ")+", text):
|
for match in finditer("(" + char + ")+", text):
|
||||||
text = (
|
positions.append((match.start(), match.end()))
|
||||||
text[: match.start()] + pre + match.group() + post + text[match.end() :]
|
return positions
|
||||||
)
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
|
# Return strings with unknown chards found in text, surrounded by context_length chars
|
||||||
def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
|
def get_unknown_chars(text: str, context_length: int = 20) -> list[str]:
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
context: str = r".{0," + str(context_length) + r"}"
|
context: str = r".{0," + str(context_length) + r"}"
|
||||||
|
@ -166,6 +166,7 @@ class Document:
|
|||||||
self.creation: str = document.date
|
self.creation: str = document.date
|
||||||
self.publication: str = document.date_publication
|
self.publication: str = document.date_publication
|
||||||
self.update: str = document.maj
|
self.update: str = document.maj
|
||||||
|
self.media: str = document.media
|
||||||
|
|
||||||
def get_slug(self, date: bool = False) -> str:
|
def get_slug(self, date: bool = False) -> str:
|
||||||
return slugify((self.publication + "-" if date else "") + self.title)
|
return slugify((self.publication + "-" if date else "") + self.title)
|
||||||
|
122
spip2md/main.py
122
spip2md/main.py
@ -1,26 +1,56 @@
|
|||||||
#!python
|
#!python
|
||||||
# pyright: strict
|
# pyright: strict
|
||||||
from os import makedirs, mkdir
|
from os import makedirs, mkdir
|
||||||
from shutil import rmtree
|
from shutil import copyfile, rmtree
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
from config import config
|
from config import config
|
||||||
from converter import get_unknown_chars, highlight_unknown_chars
|
from converter import get_unknown_chars, unknown_chars
|
||||||
from database import db
|
from database import db
|
||||||
from items import Article, Sections
|
from items import Article, Documents, Sections
|
||||||
|
|
||||||
|
|
||||||
|
# Print a stylized string, without trailing newline
|
||||||
|
def style(string: str, *args: int) -> None:
|
||||||
|
esc = "\033[" # Terminal escape sequence, needs to be closed by "m"
|
||||||
|
if len(args) == 0:
|
||||||
|
params: str = "1;" # Defaults to bold
|
||||||
|
else:
|
||||||
|
params: str = ""
|
||||||
|
for a in args:
|
||||||
|
params += str(a) + ";"
|
||||||
|
print(esc + params[:-1] + "m" + string + esc + "0m", end="")
|
||||||
|
|
||||||
|
|
||||||
|
# Define styles
|
||||||
|
BO = 1 # Bold
|
||||||
|
IT = 3 # Italic
|
||||||
|
UN = 4 # Underline
|
||||||
|
# Define colors
|
||||||
|
R = 91 # Red
|
||||||
|
G = 92 # Green
|
||||||
|
Y = 93 # Yellow
|
||||||
|
B = 94 # Blue
|
||||||
|
C0 = 95 # Color
|
||||||
|
C1 = 96 # Color
|
||||||
|
C2 = 96 # Color
|
||||||
|
|
||||||
|
|
||||||
|
# Print a string, highlighting every substring starting at start_stop[x][0] …
|
||||||
|
def highlight(string: str, *start_stop: tuple[int, int]) -> None:
|
||||||
|
previous_stop = 0
|
||||||
|
for start, stop in start_stop:
|
||||||
|
print(string[previous_stop:start], end="")
|
||||||
|
style(string[start:stop], BO, R)
|
||||||
|
previous_stop = stop
|
||||||
|
print(string[previous_stop:], end="")
|
||||||
|
|
||||||
# Define terminal escape sequences to stylize output
|
|
||||||
R: str = "\033[91m"
|
|
||||||
G: str = "\033[92m"
|
|
||||||
B: str = "\033[94m"
|
|
||||||
BOLD: str = "\033[1m"
|
|
||||||
RESET: str = "\033[0m"
|
|
||||||
|
|
||||||
# Connect to the MySQL database with Peewee ORM
|
# Connect to the MySQL database with Peewee ORM
|
||||||
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
||||||
db.connect()
|
db.connect()
|
||||||
|
|
||||||
if __name__ == "__main__": # Following is executed only if script is directly executed
|
if __name__ == "__main__": # Only if script is directly executed
|
||||||
# Define max nb of articles to export based on first CLI argument
|
# Define max nb of articles to export based on first CLI argument
|
||||||
if len(argv) >= 2:
|
if len(argv) >= 2:
|
||||||
toexport = int(argv[1])
|
toexport = int(argv[1])
|
||||||
@ -41,22 +71,16 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
|||||||
break
|
break
|
||||||
articles = section.get_articles(toexport)
|
articles = section.get_articles(toexport)
|
||||||
# Print the name of the exported section & number of remaining sections
|
# Print the name of the exported section & number of remaining sections
|
||||||
print(
|
style(f"{counter.count + 1}. ", BO)
|
||||||
f"{BOLD}{counter.count + 1}. {RESET}"
|
highlight(section.title, *unknown_chars(section.title))
|
||||||
+ highlight_unknown_chars(section.title, R, RESET),
|
|
||||||
end="",
|
|
||||||
)
|
|
||||||
if counter.remaining() > 2:
|
if counter.remaining() > 2:
|
||||||
print(
|
style(f" {counter.remaining()-1}", BO, G)
|
||||||
f" {BOLD}{B}{counter.remaining()-1}{RESET} {BOLD}sections left"
|
style(" sections")
|
||||||
+ RESET,
|
print(" left to export", end="")
|
||||||
end="",
|
|
||||||
)
|
|
||||||
if toexport > 1:
|
if toexport > 1:
|
||||||
print(
|
style(f" {toexport}", BO, Y)
|
||||||
f" {BOLD}Export limit is in {R}{toexport}{RESET} articles{RESET}",
|
style(" articles")
|
||||||
end="",
|
print(" left before export limit", end="")
|
||||||
)
|
|
||||||
print()
|
print()
|
||||||
# Define the section’s path (directory) & create directory(ies) if needed
|
# Define the section’s path (directory) & create directory(ies) if needed
|
||||||
sectiondir: str = config.output_dir + "/" + section.get_slug()
|
sectiondir: str = config.output_dir + "/" + section.get_slug()
|
||||||
@ -70,17 +94,18 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
|||||||
# Print the remaining number of articles to export every 100 articles
|
# Print the remaining number of articles to export every 100 articles
|
||||||
if counter.count % 100 == 0:
|
if counter.count % 100 == 0:
|
||||||
s: str = "s" if counter.remaining() > 1 else ""
|
s: str = "s" if counter.remaining() > 1 else ""
|
||||||
print(
|
print(" Exporting", end="")
|
||||||
f" {BOLD}Exporting {G}{counter.remaining()}{RESET}"
|
style(f" {counter.remaining()}", BO, Y)
|
||||||
+ f"{BOLD} SPIP article{s}{RESET} to Markdown & YAML files"
|
print(" SPIP", end="")
|
||||||
)
|
style(f" article{s}")
|
||||||
|
print(" to Markdown & YAML files")
|
||||||
# Print the title of the article being exported
|
# Print the title of the article being exported
|
||||||
print(
|
style(
|
||||||
f" {BOLD}{counter.count + 1}. "
|
f" {counter.count + 1}. "
|
||||||
+ ("EMPTY " if len(article.text) < 1 else "")
|
+ ("EMPTY " if len(article.text) < 1 else "")
|
||||||
+ f"{article.lang} {RESET}"
|
+ f"{article.lang} "
|
||||||
+ highlight_unknown_chars(article.title, R, RESET)
|
|
||||||
)
|
)
|
||||||
|
highlight(article.title, *unknown_chars(article.title))
|
||||||
# Define the full article path & create directory(ies) if needed
|
# Define the full article path & create directory(ies) if needed
|
||||||
articledir: str = sectiondir + "/" + article.get_slug()
|
articledir: str = sectiondir + "/" + article.get_slug()
|
||||||
makedirs(articledir, exist_ok=True)
|
makedirs(articledir, exist_ok=True)
|
||||||
@ -91,10 +116,23 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
|||||||
# Store articles with unknown characters
|
# Store articles with unknown characters
|
||||||
if len(get_unknown_chars(article.text)) > 0:
|
if len(get_unknown_chars(article.text)) > 0:
|
||||||
unknown_chars_articles.append(article)
|
unknown_chars_articles.append(article)
|
||||||
|
# Loop over article’s related files (images …)
|
||||||
|
for document, counter in Documents(article.id):
|
||||||
|
# Print the name of the file with a counter
|
||||||
|
style(f" {counter.count + 1}. {document.media} ")
|
||||||
|
highlight(article.title, *unknown_chars(article.title))
|
||||||
|
# Copy the document from it’s SPIP location to the new location
|
||||||
|
copyfile(config.data_dir + "/" + document.file, document.get_slug())
|
||||||
|
# Print the outputted file’s path when copied the file
|
||||||
|
style(" -->", BO, B)
|
||||||
|
print(f" {articledir}/{document.get_slug()}")
|
||||||
# Print the outputted file’s path when finished exporting the article
|
# Print the outputted file’s path when finished exporting the article
|
||||||
print(f" {BOLD}{G}-->{RESET} {articlepath}")
|
style(" --> ", BO, Y)
|
||||||
|
print(articlepath)
|
||||||
# Print the outputted file’s path when finished exporting the section
|
# Print the outputted file’s path when finished exporting the section
|
||||||
print(f"{BOLD}{B}-->{RESET} {sectionpath}\n")
|
style(" --> ", BO, G)
|
||||||
|
print(sectionpath)
|
||||||
|
print()
|
||||||
# Decrement export limit with length of exported section
|
# Decrement export limit with length of exported section
|
||||||
toexport -= len(articles)
|
toexport -= len(articles)
|
||||||
|
|
||||||
@ -105,16 +143,14 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
|||||||
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
|
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
|
||||||
nb: int = len(unknown_chars_apparitions)
|
nb: int = len(unknown_chars_apparitions)
|
||||||
s: str = "s" if nb > 1 else ""
|
s: str = "s" if nb > 1 else ""
|
||||||
print(
|
style(f"\n{nb}")
|
||||||
f"\n{BOLD}{nb}{RESET} unknown character{s} in {BOLD}{article.lang}{RESET} "
|
print(f" unknown character{s} in")
|
||||||
+ highlight_unknown_chars(article.title, R, RESET)
|
style(f" {article.lang} ")
|
||||||
)
|
highlight(article.title, *unknown_chars(article.title))
|
||||||
# Print the context in which the unknown characters are found
|
# Print the context in which the unknown characters are found
|
||||||
for text in unknown_chars_apparitions:
|
for text in unknown_chars_apparitions:
|
||||||
print(
|
style(" … ")
|
||||||
f" {BOLD}…{RESET} "
|
highlight(text, *unknown_chars(text))
|
||||||
+ highlight_unknown_chars(text, R, RESET)
|
style(" … ")
|
||||||
+ f" {BOLD}…{RESET}"
|
|
||||||
)
|
|
||||||
|
|
||||||
db.close() # Close the connection with the database
|
db.close() # Close the connection with the database
|
||||||
|
Loading…
Reference in New Issue
Block a user