start big refactor to structure properly main.py, in order to export sections documents the same way than articles

2023-05-23 15:32:53 +02:00 · 2023-05-23 15:32:53 +02:00 · bf6b8d4fe5
commit bf6b8d4fe5
parent fdd25f3de6
1 changed files with 141 additions and 105 deletions
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -8,7 +8,15 @@ from sys import argv
 from config import config
 from converter import get_unknown_chars, unknown_chars
 from database import db
-from items import Article, Sections
+from items import (
    Article,
    Articles,
    Document,
    Documents,
    LimitCounter,
    Section,
    Sections,
 )
 # Print a stylized string, without trailing newline
@ -51,129 +59,157 @@ def highlight(string: str, *start_stop: tuple[int, int]) -> None:
 db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
 db.connect()
-if __name__ == "__main__":  # Only if script is directly executed
+
 # Output information about ongoing export & write section to output destination
 def write_section(
    section: Section, counter: LimitCounter
 ) -> tuple[Articles, Documents, str]:
    # Print the name of the exported section & number of remaining sections
    style(f"{counter.count + 1}. ", BO)
    highlight(section.title, *unknown_chars(section.title))
    if counter.remaining() > 2:
        style(f"   {counter.remaining()-1}", BO, G)
        style(" sections")
        print(" left to export", end="")
    if toexport > 1:
        style(f"   {toexport}", BO, Y)
        style(" articles")
        print(" left before export limit", end="")
    print()
    # Define the section’s path (directory) & create directory(ies) if needed
    sectiondir: str = config.output_dir + "/" + section.get_slug()
    makedirs(sectiondir, exist_ok=True)
    # Define the section filename & write the index at that filename
    sectionpath: str = sectiondir + "/" + section.get_filename()
    with open(sectionpath, "w") as f:
        f.write(section.get_content())
    # Return the first "limit" articles of section
    return (section.get_articles(), section.get_documents(), sectiondir)
 # Output information about ongoing export & write article to output destination
 def write_article(
    article: Article, counter: LimitCounter, sectiondir: str
 ) -> tuple[Documents, str]:
    # Print the remaining number of articles to export every 100 articles
    if counter.count % 100 == 0:
        s: str = "s" if counter.remaining() > 1 else ""
        print("  Exporting", end="")
        style(f" {counter.remaining()}", BO, Y)
        print(" SPIP", end="")
        style(f" article{s}")
        print(" to Markdown & YAML files")
    # Print the title of the article being exported
    style(
        f"  {counter.count + 1}. "
        + ("EMPTY " if len(article.text) < 1 else "")
        + f"{article.lang} "
    )
    highlight(article.title, *unknown_chars(article.title))
    print()
    # Define the full article path & create directory(ies) if needed
    articledir: str = sectiondir + "/" + article.get_slug()
    makedirs(articledir, exist_ok=True)
    # Define the article filename & write the article at the filename
    articlepath: str = articledir + "/" + article.get_filename()
    with open(articlepath, "w") as f:
        f.write(article.get_content())
    # Store articles with unknown characters
    if len(get_unknown_chars(article.text)) > 0:
        unknown_chars_articles.append(article)
    return (article.get_documents(), articledir)
 # Output information about ongoing export & copy document to output destination
 def write_document(document: Document, counter: LimitCounter, objectdir: str) -> None:
    if counter.count % 100 == 0:
        s: str = "s" if counter.remaining() > 1 else ""
        print("    Exporting", end="")
        style(f" {counter.remaining()}", BO, B)
        style(f" document{s}")
        print(" in this article")
    # Print the name of the file with a counter
    style(f"    {counter.count + 1}. {document.media} ")
    if len(document.title) > 0:
        highlight(document.title + " ", *unknown_chars(document.title))
    style("at ")
    print(document.file)
    # Define document path
    documentpath: str = expanduser(config.data_dir + "/" + document.file)
    # Copy the document from it’s SPIP location to the new location
    try:
        copyfile(documentpath, objectdir + "/" + document.get_slug())
    except FileNotFoundError:
        style("    NOT FOUND: ", BO, R)
        print(documentpath)
    else:
        # Print the outputted file’s path when copied the file
        style("    -->", BO, B)
        print(f" {objectdir}/{document.get_slug()}")
 # Return true if an article field contains an unknown character
 def has_unknown_chars(article: Article) -> bool:
    return True
 # Print the detected unknown chars in article in their context but highlighted
 def warn_unknown_chars(article: Article) -> None:
    # Print the title of the article in which there is unknown characters
    # & the number of them
    unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
    nb: int = len(unknown_chars_apparitions)
    s: str = "s" if nb > 1 else ""
    style(f"{nb}")
    print(f" unknown character{s} in", end="")
    style(f" {article.lang} ")
    highlight(article.title, *unknown_chars(article.title))
    print()  # Break line
    # Print the context in which the unknown characters are found
    for text in unknown_chars_apparitions:
        style("  … ")
        highlight(text, *unknown_chars(text))
        style(" … \n")
    print()  # Break line
 # Main loop to execute only if script is directly executed
 if __name__ == "__main__":
    # Define max nb of articles to export based on first CLI argument
    if len(argv) >= 2:
        toexport = int(argv[1])
    else:
        toexport = config.default_export_max
    # Clear the output dir & create a new
    if config.clear_output:
        # Clear the output dir & create a new
        rmtree(config.output_dir, True)
    makedirs(config.output_dir, exist_ok=True)
-    # Articles that contains unknown chars
+    # Make a list containing articles where unknown characters are detected
    unknown_chars_articles: list[Article] = []
    # Loop among first maxexport articles & export them
-    for section, counter in Sections():
+    for section, counter in Sections(toexport):
-        # Define articles of the sections, limited by toexport
+        # Write the section & store its articles
-        if toexport <= 0:
+        articles, documents, sectiondir = write_section(section, counter)
-            break
+        # Loop over section’s related files (images …)
-        articles = section.get_articles(toexport)
+        for document, counter in documents:
-        # Print the name of the exported section & number of remaining sections
+            write_document(document, counter, sectiondir)
        style(f"{counter.count + 1}. ", BO)
        highlight(section.title, *unknown_chars(section.title))
        if counter.remaining() > 2:
            style(f"   {counter.remaining()-1}", BO, G)
            style(" sections")
            print(" left to export", end="")
        if toexport > 1:
            style(f"   {toexport}", BO, Y)
            style(" articles")
            print(" left before export limit", end="")
        print()
        # Define the section’s path (directory) & create directory(ies) if needed
        sectiondir: str = config.output_dir + "/" + section.get_slug()
        makedirs(sectiondir, exist_ok=True)
        # Define the section filename & write the index at that filename
        sectionpath: str = sectiondir + "/" + section.get_filename()
        with open(sectionpath, "w") as f:
            f.write(section.get_content())
        # Loop over section’s articles
        for article, counter in articles:
-            # Print the remaining number of articles to export every 100 articles
+            documents, articledir = write_article(article, counter, sectiondir)
-            if counter.count % 100 == 0:
+            # Add article to unknown_chars_articles if needed
-                s: str = "s" if counter.remaining() > 1 else ""
+            if has_unknown_chars(article):
                print("  Exporting", end="")
                style(f" {counter.remaining()}", BO, Y)
                print(" SPIP", end="")
                style(f" article{s}")
                print(" to Markdown & YAML files")
            # Print the title of the article being exported
            style(
                f"  {counter.count + 1}. "
                + ("EMPTY " if len(article.text) < 1 else "")
                + f"{article.lang} "
            )
            highlight(article.title, *unknown_chars(article.title))
            print()
            # Define the full article path & create directory(ies) if needed
            articledir: str = sectiondir + "/" + article.get_slug()
            makedirs(articledir, exist_ok=True)
            # Define the article filename & write the article at the filename
            articlepath: str = articledir + "/" + article.get_filename()
            with open(articlepath, "w") as f:
                f.write(article.get_content())
            # Store articles with unknown characters
            if len(get_unknown_chars(article.text)) > 0:
                unknown_chars_articles.append(article)
            # Loop over article’s related files (images …)
-            for document, counter in article.get_documents():
+            for document, counter in documents:
-                if counter.count % 100 == 0:
+                write_document(document, counter, articledir)
-                    s: str = "s" if counter.remaining() > 1 else ""
+        # Break 2 lines when finished exporting the section
-                    print("    Exporting", end="")
+        print("\n")
                    style(f" {counter.remaining()}", BO, B)
                    style(f" document{s}")
                    print(" in this article")
                # Print the name of the file with a counter
                style(f"    {counter.count + 1}. {document.media} ")
                if len(document.title) > 0:
                    highlight(document.title + " ", *unknown_chars(document.title))
                style("at ")
                print(document.file)
                # Define document path
                documentpath: str = expanduser(config.data_dir + "/" + document.file)
                # Copy the document from it’s SPIP location to the new location
                try:
                    copyfile(documentpath, articledir + "/" + document.get_slug())
                except FileNotFoundError:
                    style("    NOT FOUND: ", BO, R)
                    print(documentpath)
                else:
                    # Print the outputted file’s path when copied the file
                    style("    -->", BO, B)
                    print(f" {articledir}/{document.get_slug()}")
            # Print the outputted file’s path when finished exporting the article
            style("  --> ", BO, Y)
            print(articlepath)
        # Print the outputted file’s path when finished exporting the section
        style("--> ", BO, G)
        print(sectionpath)
        print()
        # Decrement export limit with length of exported section
        toexport -= len(articles)
    print()  # Break line
    # Loop through each article that contains an unknown character
    for article in unknown_chars_articles:
-        # Print the title of the article in which there is unknown characters
+        warn_unknown_chars(article)
        # & the number of them
        unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
        nb: int = len(unknown_chars_apparitions)
        s: str = "s" if nb > 1 else ""
        style(f"{nb}")
        print(f" unknown character{s} in", end="")
        style(f" {article.lang} ")
        highlight(article.title, *unknown_chars(article.title))
        print()  # Break line
        # Print the context in which the unknown characters are found
        for text in unknown_chars_apparitions:
            style("  … ")
            highlight(text, *unknown_chars(text))
            style(" … \n")
        print()  # Break line
    db.close()  # Close the connection with the database