get only articles of the same lang than the section. commenting & cleaning

This commit is contained in:
Guilhem Fauré 2023-06-01 09:39:09 +02:00
parent fe71b8fea3
commit bc74fb0bfb
3 changed files with 27 additions and 28 deletions

View File

@ -15,22 +15,23 @@ def config_file() -> Optional[str]:
class Configuration:
db: str = "spip"
db_host: str = "localhost"
db_user: str = "spip"
db_pass: str = "password"
output_dir: str = "output/"
data_dir: str = "data/"
clear_output: bool = False
clear_log: bool = True
prepend_h1: bool = True
export_filetype: str = "md"
logfile: str = "spip2md.log"
loglevel: str = "WARNING"
unknown_char_replacement: str = "??"
alternative_languages = ("fr", "en", "es")
# max_articles_export: int = 1000 # TODO reimplement with recursion
# max_sections_export: int = 500 # TODO reimplement with recursion
db: str = "spip" # DB name
db_host: str = "localhost" # Where is the DB
db_user: str = "spip" # A DB user with read access to SPIP database
db_pass: str = "password" # Password of db_user
output_dir: str = "output/" # The directory to which DB will be exported
data_dir: str = "data/" # The directory in which SPIP images & documents are stored
prepend_h1: bool = True # Add the title of the article as a Markdown h1
unknown_char_replacement: str = "??" # Replaces unknown characters
export_languages = ("fr", "en") # Languages that will be exported
export_filetype: str = "md" # Extension of exported text files
clear_output: bool = False # Remove eventual output dir before running
clear_log: bool = False # Clear log before every run instead of appending to
logfile: str = "spip2md.log" # File where logs will be written, relative to wd
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
remove_html: bool = True # Should spip2md remove every HTML tags
max_articles_export: int = 1000 # TODO reimplement
max_sections_export: int = 500 # TODO reimplement
def __init__(self, config_file: Optional[str] = None):
if config_file is not None:

View File

@ -74,8 +74,8 @@ class NormalizedSection(SpipNormalized, SpipRubriques):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.obj_id = self.id_rubrique
self.depth = self.profondeur
self.obj_id = self.id_rubrique.cast(as_type="int")
self.depth = self.profondeur.cast(as_type="int")
class NormalizedArticle(SpipNormalized, SpipArticles):
@ -84,7 +84,7 @@ class NormalizedArticle(SpipNormalized, SpipArticles):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.obj_id = self.id_article
self.obj_id = self.id_article.cast(as_type="int")
class NormalizedDocument(SpipNormalized, SpipDocuments):
@ -93,7 +93,7 @@ class NormalizedDocument(SpipNormalized, SpipDocuments):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.obj_id = self.id_document
self.obj_id = self.id_document.cast(as_type="int")
class WritableObject(SpipNormalized):
@ -515,7 +515,7 @@ class Section(RedactionalObject, NormalizedSection):
def articles(self) -> list[Article]:
return (
Article.select()
.where(Article.id_rubrique == self.obj_id)
.where((Article.id_rubrique == self.obj_id) & (Article.lang == self.lang))
.order_by(Article.date.desc())
# .limit(limit)
)
@ -551,7 +551,7 @@ class Section(RedactionalObject, NormalizedSection):
output.append(write_loop(documents))
# Get all child section of this section
child_sections: list[Section] = (
child_sections: tuple[Section, ...] = (
Section.select()
.where(Section.id_parent == self.obj_id)
.order_by(Section.date.desc())

View File

@ -4,8 +4,6 @@ from os import makedirs, remove
from os.path import isfile
from shutil import rmtree
from peewee import ModelSelect
from spip2md.config import CFG
from spip2md.extended_models import Section
from spip2md.spip_models import DB
@ -29,7 +27,7 @@ as database user {esc(BOLD)}{CFG.db_user}{esc()}
"""
)
# Get all sections of parentID ROOTID
child_sections: list[Section] = (
child_sections: tuple[Section, ...] = (
Section.select()
.where(Section.id_parent == ROOTID)
.order_by(Section.date.desc())
@ -88,7 +86,7 @@ def clear_output() -> None:
makedirs(CFG.output_dir, exist_ok=True)
# To execute when script is directly executed as a script
# When directly executed as a script
def cli():
# def cli(*addargv: str):
# import sys
@ -101,8 +99,8 @@ def cli():
# else:
# sections_export = CFG.max_sections_export
init_logging()
clear_output()
init_logging() # Initialize logging and logfile
clear_output() # Eventually remove already existing output dir
# Connect to the MySQL database with Peewee ORM
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)