start big refactor to structure properly main.py, in order to export sections documents the same way than articles

2023-05-23 15:32:53 +02:00 · 2023-05-23 15:32:53 +02:00 · bf6b8d4fe5
commit bf6b8d4fe5
parent fdd25f3de6
1 changed files with 141 additions and 105 deletions
--- a/spip2md/main.py
+++ b/spip2md/main.py
@ -8,7 +8,15 @@ from sys import argv
 from config import config
 from converter import get_unknown_chars, unknown_chars
 from database import db
-from items import Article, Sections
+from items import (
+    Article,
+    Articles,
+    Document,
+    Documents,
+    LimitCounter,
+    Section,
+    Sections,
+)


 # Print a stylized string, without trailing newline
@ -51,27 +59,11 @@ def highlight(string: str, *start_stop: tuple[int, int]) -> None:
 db.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass)
 db.connect()

-if __name__ == "__main__":  # Only if script is directly executed
-    # Define max nb of articles to export based on first CLI argument
-    if len(argv) >= 2:
-        toexport = int(argv[1])
-    else:
-        toexport = config.default_export_max

-    if config.clear_output:
-        # Clear the output dir & create a new
-        rmtree(config.output_dir, True)
-    makedirs(config.output_dir, exist_ok=True)
-
-    # Articles that contains unknown chars
-    unknown_chars_articles: list[Article] = []
-
-    # Loop among first maxexport articles & export them
-    for section, counter in Sections():
-        # Define articles of the sections, limited by toexport
-        if toexport <= 0:
-            break
-        articles = section.get_articles(toexport)
+# Output information about ongoing export & write section to output destination
+def write_section(
+    section: Section, counter: LimitCounter
+) -> tuple[Articles, Documents, str]:
    # Print the name of the exported section & number of remaining sections
    style(f"{counter.count + 1}. ", BO)
    highlight(section.title, *unknown_chars(section.title))
@ -91,8 +83,14 @@ if __name__ == "__main__":  # Only if script is directly executed
    sectionpath: str = sectiondir + "/" + section.get_filename()
    with open(sectionpath, "w") as f:
        f.write(section.get_content())
-        # Loop over section’s articles
-        for article, counter in articles:
+    # Return the first "limit" articles of section
+    return (section.get_articles(), section.get_documents(), sectiondir)
+
+
+# Output information about ongoing export & write article to output destination
+def write_article(
+    article: Article, counter: LimitCounter, sectiondir: str
+) -> tuple[Documents, str]:
    # Print the remaining number of articles to export every 100 articles
    if counter.count % 100 == 0:
        s: str = "s" if counter.remaining() > 1 else ""
@ -119,8 +117,11 @@ if __name__ == "__main__":  # Only if script is directly executed
    # Store articles with unknown characters
    if len(get_unknown_chars(article.text)) > 0:
        unknown_chars_articles.append(article)
-            # Loop over article’s related files (images …)
-            for document, counter in article.get_documents():
+    return (article.get_documents(), articledir)
+
+
+# Output information about ongoing export & copy document to output destination
+def write_document(document: Document, counter: LimitCounter, objectdir: str) -> None:
    if counter.count % 100 == 0:
        s: str = "s" if counter.remaining() > 1 else ""
        print("    Exporting", end="")
@ -137,28 +138,23 @@ if __name__ == "__main__":  # Only if script is directly executed
    documentpath: str = expanduser(config.data_dir + "/" + document.file)
    # Copy the document from it’s SPIP location to the new location
    try:
-                    copyfile(documentpath, articledir + "/" + document.get_slug())
+        copyfile(documentpath, objectdir + "/" + document.get_slug())
    except FileNotFoundError:
        style("    NOT FOUND: ", BO, R)
        print(documentpath)
    else:
        # Print the outputted file’s path when copied the file
        style("    -->", BO, B)
-                    print(f" {articledir}/{document.get_slug()}")
-            # Print the outputted file’s path when finished exporting the article
-            style("  --> ", BO, Y)
-            print(articlepath)
-        # Print the outputted file’s path when finished exporting the section
-        style("--> ", BO, G)
-        print(sectionpath)
-        print()
-        # Decrement export limit with length of exported section
-        toexport -= len(articles)
+        print(f" {objectdir}/{document.get_slug()}")

-    print()  # Break line

-    # Loop through each article that contains an unknown character
-    for article in unknown_chars_articles:
+# Return true if an article field contains an unknown character
+def has_unknown_chars(article: Article) -> bool:
+    return True
+
+
+# Print the detected unknown chars in article in their context but highlighted
+def warn_unknown_chars(article: Article) -> None:
    # Print the title of the article in which there is unknown characters
    # & the number of them
    unknown_chars_apparitions: list[str] = get_unknown_chars(article.text)
@ -176,4 +172,44 @@ if __name__ == "__main__":  # Only if script is directly executed
        style(" … \n")
    print()  # Break line

+
+# Main loop to execute only if script is directly executed
+if __name__ == "__main__":
+    # Define max nb of articles to export based on first CLI argument
+    if len(argv) >= 2:
+        toexport = int(argv[1])
+    else:
+        toexport = config.default_export_max
+
+    # Clear the output dir & create a new
+    if config.clear_output:
+        rmtree(config.output_dir, True)
+    makedirs(config.output_dir, exist_ok=True)
+
+    # Make a list containing articles where unknown characters are detected
+    unknown_chars_articles: list[Article] = []
+
+    # Loop among first maxexport articles & export them
+    for section, counter in Sections(toexport):
+        # Write the section & store its articles
+        articles, documents, sectiondir = write_section(section, counter)
+        # Loop over section’s related files (images …)
+        for document, counter in documents:
+            write_document(document, counter, sectiondir)
+        # Loop over section’s articles
+        for article, counter in articles:
+            documents, articledir = write_article(article, counter, sectiondir)
+            # Add article to unknown_chars_articles if needed
+            if has_unknown_chars(article):
+                unknown_chars_articles.append(article)
+            # Loop over article’s related files (images …)
+            for document, counter in documents:
+                write_document(document, counter, articledir)
+        # Break 2 lines when finished exporting the section
+        print("\n")
+
+    # Loop through each article that contains an unknown character
+    for article in unknown_chars_articles:
+        warn_unknown_chars(article)
+
    db.close()  # Close the connection with the database