start big refactor to structure properly main.py, in order to export sections documents the same way than articles
This commit is contained in:
parent
fdd25f3de6
commit
bf6b8d4fe5
246
spip2md/main.py
246
spip2md/main.py
@ -8,7 +8,15 @@ from sys import argv
|
|||||||
from config import config
|
from config import config
|
||||||
from converter import get_unknown_chars, unknown_chars
|
from converter import get_unknown_chars, unknown_chars
|
||||||
from database import db
|
from database import db
|
||||||
from items import Article, Sections
|
from items import (
|
||||||
|
Article,
|
||||||
|
Articles,
|
||||||
|
Document,
|
||||||
|
Documents,
|
||||||
|
LimitCounter,
|
||||||
|
Section,
|
||||||
|
Sections,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Print a stylized string, without trailing newline
|
# Print a stylized string, without trailing newline
|
||||||
@ -51,129 +59,157 @@ def highlight(string: str, *start_stop: tuple[int, int]) -> None:
|
|||||||
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
|
||||||
db.connect()
|
db.connect()
|
||||||
|
|
||||||
if __name__ == "__main__": # Only if script is directly executed
|
|
||||||
|
# Output information about ongoing export & write section to output destination
|
||||||
|
def write_section(
|
||||||
|
section: Section, counter: LimitCounter
|
||||||
|
) -> tuple[Articles, Documents, str]:
|
||||||
|
# Print the name of the exported section & number of remaining sections
|
||||||
|
style(f"{counter.count + 1}. ", BO)
|
||||||
|
highlight(section.title, *unknown_chars(section.title))
|
||||||
|
if counter.remaining() > 2:
|
||||||
|
style(f" {counter.remaining()-1}", BO, G)
|
||||||
|
style(" sections")
|
||||||
|
print(" left to export", end="")
|
||||||
|
if toexport > 1:
|
||||||
|
style(f" {toexport}", BO, Y)
|
||||||
|
style(" articles")
|
||||||
|
print(" left before export limit", end="")
|
||||||
|
print()
|
||||||
|
# Define the section’s path (directory) & create directory(ies) if needed
|
||||||
|
sectiondir: str = config.output_dir + "/" + section.get_slug()
|
||||||
|
makedirs(sectiondir, exist_ok=True)
|
||||||
|
# Define the section filename & write the index at that filename
|
||||||
|
sectionpath: str = sectiondir + "/" + section.get_filename()
|
||||||
|
with open(sectionpath, "w") as f:
|
||||||
|
f.write(section.get_content())
|
||||||
|
# Return the first "limit" articles of section
|
||||||
|
return (section.get_articles(), section.get_documents(), sectiondir)
|
||||||
|
|
||||||
|
|
||||||
|
# Output information about ongoing export & write article to output destination
|
||||||
|
def write_article(
|
||||||
|
article: Article, counter: LimitCounter, sectiondir: str
|
||||||
|
) -> tuple[Documents, str]:
|
||||||
|
# Print the remaining number of articles to export every 100 articles
|
||||||
|
if counter.count % 100 == 0:
|
||||||
|
s: str = "s" if counter.remaining() > 1 else ""
|
||||||
|
print(" Exporting", end="")
|
||||||
|
style(f" {counter.remaining()}", BO, Y)
|
||||||
|
print(" SPIP", end="")
|
||||||
|
style(f" article{s}")
|
||||||
|
print(" to Markdown & YAML files")
|
||||||
|
# Print the title of the article being exported
|
||||||
|
style(
|
||||||
|
f" {counter.count + 1}. "
|
||||||
|
+ ("EMPTY " if len(article.text) < 1 else "")
|
||||||
|
+ f"{article.lang} "
|
||||||
|
)
|
||||||
|
highlight(article.title, *unknown_chars(article.title))
|
||||||
|
print()
|
||||||
|
# Define the full article path & create directory(ies) if needed
|
||||||
|
articledir: str = sectiondir + "/" + article.get_slug()
|
||||||
|
makedirs(articledir, exist_ok=True)
|
||||||
|
# Define the article filename & write the article at the filename
|
||||||
|
articlepath: str = articledir + "/" + article.get_filename()
|
||||||
|
with open(articlepath, "w") as f:
|
||||||
|
f.write(article.get_content())
|
||||||
|
# Store articles with unknown characters
|
||||||
|
if len(get_unknown_chars(article.text)) > 0:
|
||||||
|
unknown_chars_articles.append(article)
|
||||||
|
return (article.get_documents(), articledir)
|
||||||
|
|
||||||
|
|
||||||
|
# Output information about ongoing export & copy document to output destination
|
||||||
|
def write_document(document: Document, counter: LimitCounter, objectdir: str) -> None:
|
||||||
|
if counter.count % 100 == 0:
|
||||||
|
s: str = "s" if counter.remaining() > 1 else ""
|
||||||
|
print(" Exporting", end="")
|
||||||
|
style(f" {counter.remaining()}", BO, B)
|
||||||
|
style(f" document{s}")
|
||||||
|
print(" in this article")
|
||||||
|
# Print the name of the file with a counter
|
||||||
|
style(f" {counter.count + 1}. {document.media} ")
|
||||||
|
if len(document.title) > 0:
|
||||||
|
highlight(document.title + " ", *unknown_chars(document.title))
|
||||||
|
style("at ")
|
||||||
|
print(document.file)
|
||||||
|
# Define document path
|
||||||
|
documentpath: str = expanduser(config.data_dir + "/" + document.file)
|
||||||
|
# Copy the document from it’s SPIP location to the new location
|
||||||
|
try:
|
||||||
|
copyfile(documentpath, objectdir + "/" + document.get_slug())
|
||||||
|
except FileNotFoundError:
|
||||||
|
style(" NOT FOUND: ", BO, R)
|
||||||
|
print(documentpath)
|
||||||
|
else:
|
||||||
|
# Print the outputted file’s path when copied the file
|
||||||
|
style(" -->", BO, B)
|
||||||
|
print(f" {objectdir}/{document.get_slug()}")
|
||||||
|
|
||||||
|
|
||||||
|
# Return true if an article field contains an unknown character
|
||||||
|
def has_unknown_chars(article: Article) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# Print the detected unknown chars in article in their context but highlighted
|
||||||
|
def warn_unknown_chars(article: Article) -> None:
|
||||||
|
# Print the title of the article in which there is unknown characters
|
||||||
|
# & the number of them
|
||||||
|
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
|
||||||
|
nb: int = len(unknown_chars_apparitions)
|
||||||
|
s: str = "s" if nb > 1 else ""
|
||||||
|
style(f"{nb}")
|
||||||
|
print(f" unknown character{s} in", end="")
|
||||||
|
style(f" {article.lang} ")
|
||||||
|
highlight(article.title, *unknown_chars(article.title))
|
||||||
|
print() # Break line
|
||||||
|
# Print the context in which the unknown characters are found
|
||||||
|
for text in unknown_chars_apparitions:
|
||||||
|
style(" … ")
|
||||||
|
highlight(text, *unknown_chars(text))
|
||||||
|
style(" … \n")
|
||||||
|
print() # Break line
|
||||||
|
|
||||||
|
|
||||||
|
# Main loop to execute only if script is directly executed
|
||||||
|
if __name__ == "__main__":
|
||||||
# Define max nb of articles to export based on first CLI argument
|
# Define max nb of articles to export based on first CLI argument
|
||||||
if len(argv) >= 2:
|
if len(argv) >= 2:
|
||||||
toexport = int(argv[1])
|
toexport = int(argv[1])
|
||||||
else:
|
else:
|
||||||
toexport = config.default_export_max
|
toexport = config.default_export_max
|
||||||
|
|
||||||
|
# Clear the output dir & create a new
|
||||||
if config.clear_output:
|
if config.clear_output:
|
||||||
# Clear the output dir & create a new
|
|
||||||
rmtree(config.output_dir, True)
|
rmtree(config.output_dir, True)
|
||||||
makedirs(config.output_dir, exist_ok=True)
|
makedirs(config.output_dir, exist_ok=True)
|
||||||
|
|
||||||
# Articles that contains unknown chars
|
# Make a list containing articles where unknown characters are detected
|
||||||
unknown_chars_articles: list[Article] = []
|
unknown_chars_articles: list[Article] = []
|
||||||
|
|
||||||
# Loop among first maxexport articles & export them
|
# Loop among first maxexport articles & export them
|
||||||
for section, counter in Sections():
|
for section, counter in Sections(toexport):
|
||||||
# Define articles of the sections, limited by toexport
|
# Write the section & store its articles
|
||||||
if toexport <= 0:
|
articles, documents, sectiondir = write_section(section, counter)
|
||||||
break
|
# Loop over section’s related files (images …)
|
||||||
articles = section.get_articles(toexport)
|
for document, counter in documents:
|
||||||
# Print the name of the exported section & number of remaining sections
|
write_document(document, counter, sectiondir)
|
||||||
style(f"{counter.count + 1}. ", BO)
|
|
||||||
highlight(section.title, *unknown_chars(section.title))
|
|
||||||
if counter.remaining() > 2:
|
|
||||||
style(f" {counter.remaining()-1}", BO, G)
|
|
||||||
style(" sections")
|
|
||||||
print(" left to export", end="")
|
|
||||||
if toexport > 1:
|
|
||||||
style(f" {toexport}", BO, Y)
|
|
||||||
style(" articles")
|
|
||||||
print(" left before export limit", end="")
|
|
||||||
print()
|
|
||||||
# Define the section’s path (directory) & create directory(ies) if needed
|
|
||||||
sectiondir: str = config.output_dir + "/" + section.get_slug()
|
|
||||||
makedirs(sectiondir, exist_ok=True)
|
|
||||||
# Define the section filename & write the index at that filename
|
|
||||||
sectionpath: str = sectiondir + "/" + section.get_filename()
|
|
||||||
with open(sectionpath, "w") as f:
|
|
||||||
f.write(section.get_content())
|
|
||||||
# Loop over section’s articles
|
# Loop over section’s articles
|
||||||
for article, counter in articles:
|
for article, counter in articles:
|
||||||
# Print the remaining number of articles to export every 100 articles
|
documents, articledir = write_article(article, counter, sectiondir)
|
||||||
if counter.count % 100 == 0:
|
# Add article to unknown_chars_articles if needed
|
||||||
s: str = "s" if counter.remaining() > 1 else ""
|
if has_unknown_chars(article):
|
||||||
print(" Exporting", end="")
|
|
||||||
style(f" {counter.remaining()}", BO, Y)
|
|
||||||
print(" SPIP", end="")
|
|
||||||
style(f" article{s}")
|
|
||||||
print(" to Markdown & YAML files")
|
|
||||||
# Print the title of the article being exported
|
|
||||||
style(
|
|
||||||
f" {counter.count + 1}. "
|
|
||||||
+ ("EMPTY " if len(article.text) < 1 else "")
|
|
||||||
+ f"{article.lang} "
|
|
||||||
)
|
|
||||||
highlight(article.title, *unknown_chars(article.title))
|
|
||||||
print()
|
|
||||||
# Define the full article path & create directory(ies) if needed
|
|
||||||
articledir: str = sectiondir + "/" + article.get_slug()
|
|
||||||
makedirs(articledir, exist_ok=True)
|
|
||||||
# Define the article filename & write the article at the filename
|
|
||||||
articlepath: str = articledir + "/" + article.get_filename()
|
|
||||||
with open(articlepath, "w") as f:
|
|
||||||
f.write(article.get_content())
|
|
||||||
# Store articles with unknown characters
|
|
||||||
if len(get_unknown_chars(article.text)) > 0:
|
|
||||||
unknown_chars_articles.append(article)
|
unknown_chars_articles.append(article)
|
||||||
# Loop over article’s related files (images …)
|
# Loop over article’s related files (images …)
|
||||||
for document, counter in article.get_documents():
|
for document, counter in documents:
|
||||||
if counter.count % 100 == 0:
|
write_document(document, counter, articledir)
|
||||||
s: str = "s" if counter.remaining() > 1 else ""
|
# Break 2 lines when finished exporting the section
|
||||||
print(" Exporting", end="")
|
print("\n")
|
||||||
style(f" {counter.remaining()}", BO, B)
|
|
||||||
style(f" document{s}")
|
|
||||||
print(" in this article")
|
|
||||||
# Print the name of the file with a counter
|
|
||||||
style(f" {counter.count + 1}. {document.media} ")
|
|
||||||
if len(document.title) > 0:
|
|
||||||
highlight(document.title + " ", *unknown_chars(document.title))
|
|
||||||
style("at ")
|
|
||||||
print(document.file)
|
|
||||||
# Define document path
|
|
||||||
documentpath: str = expanduser(config.data_dir + "/" + document.file)
|
|
||||||
# Copy the document from it’s SPIP location to the new location
|
|
||||||
try:
|
|
||||||
copyfile(documentpath, articledir + "/" + document.get_slug())
|
|
||||||
except FileNotFoundError:
|
|
||||||
style(" NOT FOUND: ", BO, R)
|
|
||||||
print(documentpath)
|
|
||||||
else:
|
|
||||||
# Print the outputted file’s path when copied the file
|
|
||||||
style(" -->", BO, B)
|
|
||||||
print(f" {articledir}/{document.get_slug()}")
|
|
||||||
# Print the outputted file’s path when finished exporting the article
|
|
||||||
style(" --> ", BO, Y)
|
|
||||||
print(articlepath)
|
|
||||||
# Print the outputted file’s path when finished exporting the section
|
|
||||||
style("--> ", BO, G)
|
|
||||||
print(sectionpath)
|
|
||||||
print()
|
|
||||||
# Decrement export limit with length of exported section
|
|
||||||
toexport -= len(articles)
|
|
||||||
|
|
||||||
print() # Break line
|
|
||||||
|
|
||||||
# Loop through each article that contains an unknown character
|
# Loop through each article that contains an unknown character
|
||||||
for article in unknown_chars_articles:
|
for article in unknown_chars_articles:
|
||||||
# Print the title of the article in which there is unknown characters
|
warn_unknown_chars(article)
|
||||||
# & the number of them
|
|
||||||
unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
|
|
||||||
nb: int = len(unknown_chars_apparitions)
|
|
||||||
s: str = "s" if nb > 1 else ""
|
|
||||||
style(f"{nb}")
|
|
||||||
print(f" unknown character{s} in", end="")
|
|
||||||
style(f" {article.lang} ")
|
|
||||||
highlight(article.title, *unknown_chars(article.title))
|
|
||||||
print() # Break line
|
|
||||||
# Print the context in which the unknown characters are found
|
|
||||||
for text in unknown_chars_apparitions:
|
|
||||||
style(" … ")
|
|
||||||
highlight(text, *unknown_chars(text))
|
|
||||||
style(" … \n")
|
|
||||||
print() # Break line
|
|
||||||
|
|
||||||
db.close() # Close the connection with the database
|
db.close() # Close the connection with the database
|
||||||
|
Loading…
Reference in New Issue
Block a user