recursive refactor

This commit is contained in:
Guilhem Fauré 2023-05-25 16:52:34 +02:00
parent 042266a1c4
commit 861379748c
3 changed files with 92 additions and 51 deletions

View File

@ -4,11 +4,11 @@ from typing import Optional
from yaml import Loader, load from yaml import Loader, load
config_paths = ("spip2md.yml", "spip2md.yaml") CONFIG_PATHS = ("spip2md.yml", "spip2md.yaml")
def config_file() -> Optional[str]: def config_file() -> Optional[str]:
for path in config_paths: for path in CONFIG_PATHS:
if isfile(path): if isfile(path):
return path return path
@ -34,14 +34,13 @@ class Configuration:
# Assign configuration for each attribute in config file # Assign configuration for each attribute in config file
for attr in config: for attr in config:
# If attribute is a dir, ensure that ~ is converted to home path # If attribute is a dir, ensure that ~ is converted to home path
if type(attr) == "string" and "dir" in attr: if "dir" in attr:
directory = expanduser(config[attr]) directory = expanduser(config[attr])
# Ensure that directory ends with a slash # Ensure that directory ends with a slash
directory = ( directory = directory if directory[:-1] == "/" else directory + "/"
directory if directory.last() == "/" else directory + "/"
)
setattr(self, attr, directory) setattr(self, attr, directory)
else:
setattr(self, attr, config[attr]) setattr(self, attr, config[attr])
config = Configuration(config_file()) CFG = Configuration(config_file())

View File

@ -5,7 +5,7 @@ from sys import argv
from peewee import ModelSelect from peewee import ModelSelect
from config import config from config import CFG
from converters import unknown_chars, unknown_chars_context from converters import unknown_chars, unknown_chars_context
from database import DB from database import DB
from spipobjects import ( from spipobjects import (
@ -52,7 +52,7 @@ def warn_unknown_chars(article: Article) -> None:
# Connect to the MySQL database with Peewee ORM # Connect to the MySQL database with Peewee ORM
DB.init(config.db, host=config.db_host, user=config.db_user, password=config.db_pass) DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
DB.connect() DB.connect()
@ -62,29 +62,29 @@ if __name__ == "__main__":
if len(argv) >= 2: if len(argv) >= 2:
max_articles_export = int(argv[1]) max_articles_export = int(argv[1])
else: else:
max_articles_export = config.max_articles_export max_articles_export = CFG.max_articles_export
# Define max nb of sections to export based on second CLI argument # Define max nb of sections to export based on second CLI argument
if len(argv) >= 3: if len(argv) >= 3:
max_sections_export = int(argv[2]) max_sections_export = int(argv[2])
else: else:
max_sections_export = config.max_sections_export max_sections_export = CFG.max_sections_export
# Clear the output dir & create a new # Clear the output dir & create a new
if config.clear_output: if CFG.clear_output:
rmtree(config.output_dir, True) rmtree(CFG.output_dir, True)
makedirs(config.output_dir, exist_ok=True) makedirs(CFG.output_dir, exist_ok=True)
# Make a list containing articles where unknown characters are detected # Get the first max_sections_export root sections
unknown_chars_articles: list[Article] = [] sections: ModelSelect = root_sections(max_sections_export)
total: int = len(sections)
# Write each root sections with its subtree # Write each root sections with its subtree
for section in root_sections(max_sections_export): for i, section in enumerate(sections):
section.write() section.write_tree(CFG.output_dir, i, total)
print() # Break line after exporting the section print() # Break line after exporting the section
print() # Break line between export & unknown characters warning # print() # Break line between export & unknown characters warning
# Warn about each article that contains unknown(s) character(s) # Warn about each article that contains unknown(s) character(s)
for article in unknown_chars_articles: # TODO do it with Python warnings
warn_unknown_chars(article)
DB.close() # Close the connection with the database DB.close() # Close the connection with the database

View File

@ -4,11 +4,11 @@ from re import finditer
from shutil import copyfile from shutil import copyfile
from typing import Any, Optional from typing import Any, Optional
from peewee import BigAutoField, DateTimeField, Model, ModelSelect from peewee import BigAutoField, DateTimeField, ModelSelect
from slugify import slugify from slugify import slugify
from yaml import dump from yaml import dump
from config import config from config import CFG
from converters import convert, link_document, unknown_chars from converters import convert, link_document, unknown_chars
from database import ( from database import (
SpipArticles, SpipArticles,
@ -18,22 +18,20 @@ from database import (
SpipDocumentsLiens, SpipDocumentsLiens,
SpipRubriques, SpipRubriques,
) )
from styling import BLUE, BOLD, GREEN, YELLOW, highlight, indent, ss, style from styling import BLUE, BOLD, GREEN, RED, YELLOW, highlight, indent, ss, style
class SpipWritable: class SpipWritable:
class Meta:
table_name: str
term_color: int term_color: int
texte: str texte: str
lang: str lang: str
titre: str titre: str
def filename(self, date: bool = False) -> str: def filename(self, date: bool = False) -> str:
raise NotImplementedError("Subclasses need to implement filename()") raise NotImplementedError(
f"Subclasses need to implement filename(), date: {date}"
)
# Output information about file that will be exported
def begin_message( def begin_message(
self, index: int, limit: int, depth: int = 0, step: int = 100 self, index: int, limit: int, depth: int = 0, step: int = 100
) -> None: ) -> None:
@ -42,23 +40,31 @@ class SpipWritable:
indent(depth) indent(depth)
print("Exporting", end="") print("Exporting", end="")
style(f" {limit-index}", BOLD, self.term_color) style(f" {limit-index}", BOLD, self.term_color)
print(f" element{ss(limit-index)} from", end="") if hasattr(self, "profondeur"):
style(f" {self.Meta.table_name}") print(f" level {self.profondeur}", end="")
style(f" {type(self).__name__}{ss(limit-index)}\n")
# Print the counter & title of the object being exported # Print the counter & title of the object being exported
indent(depth) indent(depth)
style(f"{index + 1}. ") style(f"{index + 1}. ")
if len(self.titre) > 0:
highlight(self.titre, *unknown_chars(self.titre)) highlight(self.titre, *unknown_chars(self.titre))
else:
print("NO NAME", end="")
# + ("EMPTY " if len(self.texte) < 1 else "") # + ("EMPTY " if len(self.texte) < 1 else "")
# + f"{self.lang} " # + f"{self.lang} "
# Write object to output destination # Write object to output destination
def write(self, export_dir: str) -> None: def write(self, parent_dir: str) -> str:
raise NotImplementedError("Subclasses need to implement write()") raise NotImplementedError(
f"Subclasses need to implement write(), export dir: {parent_dir}"
)
# Output information about file that was just exported # Output information about file that was just exported
def end_message(self, export_dir: str): def end_message(self, message: str | Exception):
style(" -> ", BOLD, self.term_color) style(" -> ", BOLD, self.term_color)
print(export_dir + self.filename()) if message is Exception:
style("ERROR ", BOLD, RED)
print(message)
class Document(SpipWritable, SpipDocuments): class Document(SpipWritable, SpipDocuments):
@ -82,12 +88,13 @@ class Document(SpipWritable, SpipDocuments):
) )
# Write document to output destination # Write document to output destination
def write(self, export_dir: str) -> None: def write(self, parent_dir: str) -> str:
# Define file source and destination
src: str = CFG.data_dir + self.fichier
dest: str = parent_dir + self.filename()
# Copy the document from its SPIP location to the new location # Copy the document from its SPIP location to the new location
try: copyfile(src, dest)
copyfile(config.data_dir + self.fichier, export_dir + self.filename()) return dest
except FileNotFoundError:
raise FileNotFoundError(" -> NOT FOUND!\n") from None
class SpipObject(SpipWritable): class SpipObject(SpipWritable):
@ -112,7 +119,7 @@ class SpipObject(SpipWritable):
# Convert SPIP style internal links for images & other files into Markdown style # Convert SPIP style internal links for images & other files into Markdown style
def link_documents(self, documents: ModelSelect) -> None: def link_documents(self, documents: ModelSelect) -> None:
for d in documents: for d in documents:
self.texte = link_document(self.texte, d.id_document, d.titre, d.slug()) self.texte = link_document(self.texte, d.id_document, d.titre, d.filename())
# Output related documents & link them in the text by the way # Output related documents & link them in the text by the way
def documents(self, link_documents: bool = True) -> ModelSelect: def documents(self, link_documents: bool = True) -> ModelSelect:
@ -137,7 +144,7 @@ class SpipObject(SpipWritable):
else: else:
title: str = article.titre title: str = article.titre
self.texte = self.texte.replace( self.texte = self.texte.replace(
match.group(0), f"[{title}]({article.slug()}/{article.filename()})" match.group(0), f"[{title}]({article.dir_slug()}/{article.filename()})"
) )
# Output related articles # Output related articles
@ -157,7 +164,7 @@ class SpipObject(SpipWritable):
# Get filename of this object # Get filename of this object
def filename(self) -> str: def filename(self) -> str:
return self.prefix + "." + self.lang + "." + config.export_filetype return self.prefix + "." + self.lang + "." + CFG.export_filetype
# Get the YAML frontmatter string # Get the YAML frontmatter string
def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str: def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
@ -183,7 +190,7 @@ class SpipObject(SpipWritable):
# Start the content with frontmatter # Start the content with frontmatter
body: str = "---\n" + self.frontmatter() + "---" body: str = "---\n" + self.frontmatter() + "---"
# Add the title as a Markdown h1 # Add the title as a Markdown h1
if len(self.titre) > 0 and config.prepend_h1: if len(self.titre) > 0 and CFG.prepend_h1:
body += "\n\n# " + self.titre body += "\n\n# " + self.titre
# If there is a text, add the text preceded by two line breaks # If there is a text, add the text preceded by two line breaks
if len(self.texte) > 0: if len(self.texte) > 0:
@ -195,9 +202,17 @@ class SpipObject(SpipWritable):
return body return body
# Write object to output destination # Write object to output destination
def write(self, export_dir: str) -> None: def write(self, parent_dir: str) -> str:
with open(export_dir + self.filename(), "w") as f: # Define actual export directory
directory: str = self.dir_slug() + parent_dir
# Make a directory for this object if there isnt
makedirs(directory, exist_ok=True)
# Define actual export path
path: str = directory + self.filename()
# Write the content of this object into a file named as self.filename()
with open(path, "w") as f:
f.write(self.content()) f.write(self.content())
return path
class Article(SpipObject, SpipArticles): class Article(SpipObject, SpipArticles):
@ -213,7 +228,7 @@ class Article(SpipObject, SpipArticles):
self.ps: str = convert(self.ps) # Probably unused self.ps: str = convert(self.ps) # Probably unused
self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false" self.accepter_forum: str = "true" if self.accepter_forum == "oui" else "false"
# ID # ID
self.id = self.id_article self.object_id = self.id_article
# Terminal output color # Terminal output color
self.term_color = YELLOW self.term_color = YELLOW
@ -264,7 +279,7 @@ class Rubrique(SpipObject, SpipRubriques):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
# ID # ID
self.id = self.id_rubrique self.object_id = self.id_rubrique
# File prefix # File prefix
self.prefix = "_index" self.prefix = "_index"
# Terminal output color # Terminal output color
@ -281,9 +296,36 @@ class Rubrique(SpipObject, SpipRubriques):
else: else:
return dump(super().frontmatter(meta), allow_unicode=True) return dump(super().frontmatter(meta), allow_unicode=True)
def write_tree(self): def write_tree(self, parent_dir: str, index: int, total: int):
self.begin_message(index, total, int(self.profondeur))
# Get this sections articles documents
articles = self.articles()
documents = self.documents()
# Write this section
self.link_articles()
export_path: str = self.write(parent_dir)
self.end_message(export_path)
# Write this sections articles and documents
def write_loop(objects: ModelSelect):
total = len(objects)
for i, obj in enumerate(objects):
obj.begin_message(i, total, self.profondeur + 1)
try:
export_path: str = obj.write(parent_dir)
obj.end_message(export_path)
except Exception as err:
obj.end_message(err)
write_loop(articles)
write_loop(documents)
# Get all child section of self
child_sections = ( child_sections = (
Rubrique.select() Rubrique.select()
.where(Rubrique.id_parent == self.id_rubrique) .where(Rubrique.id_parent == self.id_rubrique)
.order_by(Rubrique.date.desc()) .order_by(Rubrique.date.desc())
) )
# Do the same for subsections (write their entire subtree)
for i, s in enumerate(child_sections):
s.write_tree(parent_dir + self.dir_slug(), i, total)