From bc74fb0bfb83734b86fddf4e6e27fb363f32988b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Thu, 1 Jun 2023 09:39:09 +0200 Subject: [PATCH] get only articles of the same lang than the section. commenting & cleaning --- spip2md/config.py | 33 +++++++++++++++++---------------- spip2md/extended_models.py | 12 ++++++------ spip2md/lib.py | 10 ++++------ 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/spip2md/config.py b/spip2md/config.py index b48736a..937bd6b 100644 --- a/spip2md/config.py +++ b/spip2md/config.py @@ -15,22 +15,23 @@ def config_file() -> Optional[str]: class Configuration: - db: str = "spip" - db_host: str = "localhost" - db_user: str = "spip" - db_pass: str = "password" - output_dir: str = "output/" - data_dir: str = "data/" - clear_output: bool = False - clear_log: bool = True - prepend_h1: bool = True - export_filetype: str = "md" - logfile: str = "spip2md.log" - loglevel: str = "WARNING" - unknown_char_replacement: str = "??" - alternative_languages = ("fr", "en", "es") - # max_articles_export: int = 1000 # TODO reimplement with recursion - # max_sections_export: int = 500 # TODO reimplement with recursion + db: str = "spip" # DB name + db_host: str = "localhost" # Where is the DB + db_user: str = "spip" # A DB user with read access to SPIP database + db_pass: str = "password" # Password of db_user + output_dir: str = "output/" # The directory to which DB will be exported + data_dir: str = "data/" # The directory in which SPIP images & documents are stored + prepend_h1: bool = True # Add the title of the article as a Markdown h1 + unknown_char_replacement: str = "??" # Replaces unknown characters + export_languages = ("fr", "en") # Languages that will be exported + export_filetype: str = "md" # Extension of exported text files + clear_output: bool = False # Remove eventual output dir before running + clear_log: bool = False # Clear log before every run instead of appending to + logfile: str = "spip2md.log" # File where logs will be written, relative to wd + loglevel: str = "WARNING" # Minimum criticity of logs written in logfile + remove_html: bool = True # Should spip2md remove every HTML tags + max_articles_export: int = 1000 # TODO reimplement + max_sections_export: int = 500 # TODO reimplement def __init__(self, config_file: Optional[str] = None): if config_file is not None: diff --git a/spip2md/extended_models.py b/spip2md/extended_models.py index 0742e61..0d08e7f 100644 --- a/spip2md/extended_models.py +++ b/spip2md/extended_models.py @@ -74,8 +74,8 @@ class NormalizedSection(SpipNormalized, SpipRubriques): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.obj_id = self.id_rubrique - self.depth = self.profondeur + self.obj_id = self.id_rubrique.cast(as_type="int") + self.depth = self.profondeur.cast(as_type="int") class NormalizedArticle(SpipNormalized, SpipArticles): @@ -84,7 +84,7 @@ class NormalizedArticle(SpipNormalized, SpipArticles): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.obj_id = self.id_article + self.obj_id = self.id_article.cast(as_type="int") class NormalizedDocument(SpipNormalized, SpipDocuments): @@ -93,7 +93,7 @@ class NormalizedDocument(SpipNormalized, SpipDocuments): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.obj_id = self.id_document + self.obj_id = self.id_document.cast(as_type="int") class WritableObject(SpipNormalized): @@ -515,7 +515,7 @@ class Section(RedactionalObject, NormalizedSection): def articles(self) -> list[Article]: return ( Article.select() - .where(Article.id_rubrique == self.obj_id) + .where((Article.id_rubrique == self.obj_id) & (Article.lang == self.lang)) .order_by(Article.date.desc()) # .limit(limit) ) @@ -551,7 +551,7 @@ class Section(RedactionalObject, NormalizedSection): output.append(write_loop(documents)) # Get all child section of this section - child_sections: list[Section] = ( + child_sections: tuple[Section, ...] = ( Section.select() .where(Section.id_parent == self.obj_id) .order_by(Section.date.desc()) diff --git a/spip2md/lib.py b/spip2md/lib.py index 4f40dcd..781e17e 100644 --- a/spip2md/lib.py +++ b/spip2md/lib.py @@ -4,8 +4,6 @@ from os import makedirs, remove from os.path import isfile from shutil import rmtree -from peewee import ModelSelect - from spip2md.config import CFG from spip2md.extended_models import Section from spip2md.spip_models import DB @@ -29,7 +27,7 @@ as database user {esc(BOLD)}{CFG.db_user}{esc()} """ ) # Get all sections of parentID ROOTID - child_sections: list[Section] = ( + child_sections: tuple[Section, ...] = ( Section.select() .where(Section.id_parent == ROOTID) .order_by(Section.date.desc()) @@ -88,7 +86,7 @@ def clear_output() -> None: makedirs(CFG.output_dir, exist_ok=True) -# To execute when script is directly executed as a script +# When directly executed as a script def cli(): # def cli(*addargv: str): # import sys @@ -101,8 +99,8 @@ def cli(): # else: # sections_export = CFG.max_sections_export - init_logging() - clear_output() + init_logging() # Initialize logging and logfile + clear_output() # Eventually remove already existing output dir # Connect to the MySQL database with Peewee ORM DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)