Compare commits
7 Commits
virtual-tr
...
main
Author | SHA1 | Date | |
---|---|---|---|
|
55e19d4825 | ||
|
b245ff75c5 | ||
|
8769185b8d | ||
|
164a1e8228 | ||
|
3cc90f6dc2 | ||
|
00f3ed6cf9 | ||
|
3ab94aa388 |
@ -106,6 +106,9 @@ prepend_h1: false # Add title of articles as Markdown h1, looks better on certai
|
|||||||
# dest: title
|
# dest: title
|
||||||
# repr: "{} _" # (this is the default repr)
|
# repr: "{} _" # (this is the default repr)
|
||||||
move_fields: []
|
move_fields: []
|
||||||
|
# Some taxonomies (Spip Mots types) to not export, typically specific to Spip functions
|
||||||
|
ignore_taxonomies: ["Gestion du site", "Gestion des articles", "Mise en page"]
|
||||||
|
rename_taxonomies: { equipes: "tag-equipes" } # Rename taxonomies (prenvent conflict)
|
||||||
|
|
||||||
# Ignored data settings
|
# Ignored data settings
|
||||||
export_drafts: true # Should we export drafts
|
export_drafts: true # Should we export drafts
|
||||||
|
13
poetry.lock
generated
13
poetry.lock
generated
@ -103,17 +103,6 @@ files = [
|
|||||||
{file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
|
{file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "typing-extensions"
|
|
||||||
version = "4.6.3"
|
|
||||||
description = "Backported and Experimental Type Hints for Python 3.7+"
|
|
||||||
optional = false
|
|
||||||
python-versions = ">=3.7"
|
|
||||||
files = [
|
|
||||||
{file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"},
|
|
||||||
{file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unidecode"
|
name = "unidecode"
|
||||||
version = "1.3.6"
|
version = "1.3.6"
|
||||||
@ -128,4 +117,4 @@ files = [
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "aa57e8d6b431eddf09f5e5fe90196a965781c9b020aeb0d8ac3b7c5bd34a51a4"
|
content-hash = "b2f6a06875c1c40404e891bf9765fab11ecf7fbf04a486962c27f71b3084857a"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "spip2md"
|
name = "spip2md"
|
||||||
version = "0.1.0"
|
version = "0.1.1"
|
||||||
description = "Generate a static website with plain Markdown+YAML files from a SPIP CMS database"
|
description = "Generate a static website with plain Markdown+YAML files from a SPIP CMS database"
|
||||||
license = "GPL-2.0"
|
license = "GPL-2.0"
|
||||||
|
|
||||||
@ -27,7 +27,6 @@ pyyaml = "^6.0"
|
|||||||
python-slugify = {extras = ["unidecode"], version = "^8.0.1"}
|
python-slugify = {extras = ["unidecode"], version = "^8.0.1"}
|
||||||
pymysql = "^1.0.3"
|
pymysql = "^1.0.3"
|
||||||
peewee = "^3.16.2"
|
peewee = "^3.16.2"
|
||||||
typing-extensions = "^4.6.3"
|
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
spip2md = "spip2md.lib:cli"
|
spip2md = "spip2md.lib:cli"
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
"""
|
|
||||||
This file is part of spip2md.
|
|
||||||
Copyright (C) 2023 LCPQ/Guilhem Fauré
|
|
||||||
|
|
||||||
spip2md is free software: you can redistribute it and/or modify it under the terms of
|
|
||||||
the GNU General Public License version 2 as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
without even the implied warranty of MERCHANTABILITY or
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
See the GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with spip2md.
|
|
||||||
If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
|
|
||||||
This file main purpose is to initialise the logging infrastructure of Python.
|
|
||||||
As the __init__.py file, this file is executed whenever the package is imported.
|
|
||||||
"""
|
|
||||||
# pyright: strict
|
|
||||||
import logging
|
|
||||||
from os.path import isfile
|
|
||||||
|
|
||||||
LOGFILE: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
|
||||||
LOGLEVEL: str = "WARNING" # Minimum criticity of logs written in logfile
|
|
||||||
# Configure logging
|
|
||||||
# __import__("os").remove(LOGFILE) # Clear log ?
|
|
||||||
if isfile(LOGFILE): # Break 2 lines before new log if there’s already one
|
|
||||||
with open(LOGFILE, "a") as f:
|
|
||||||
f.write("\n\n")
|
|
||||||
logging.basicConfig(encoding="utf-8", filename=LOGFILE, level=LOGLEVEL) # Init
|
|
@ -1,7 +1,7 @@
|
|||||||
#!python
|
#!python
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from spip2md.cli import main
|
from spip2md.lib import cli
|
||||||
|
|
||||||
# if __name__ == '__main__': # It IS main
|
# if __name__ == '__main__': # It IS main
|
||||||
sys.exit(main(*sys.argv))
|
sys.exit(cli())
|
||||||
|
@ -20,13 +20,43 @@ from typing import Optional
|
|||||||
|
|
||||||
from yaml import Loader, load
|
from yaml import Loader, load
|
||||||
|
|
||||||
|
NAME: str = "spip2md" # Name of program, notably used in logs
|
||||||
|
|
||||||
|
|
||||||
|
# Searches for a configuration file from all CLI args and in standard locations
|
||||||
|
# & return his path if found
|
||||||
|
def config(*start_locations: str) -> Optional[str]:
|
||||||
|
# Search for config files in CLI arguments and function params first
|
||||||
|
argv = __import__("sys").argv
|
||||||
|
config_locations: list[str] = argv[1:] + list(start_locations)
|
||||||
|
|
||||||
|
if "XDG_CONFIG_HOME" in environ:
|
||||||
|
config_locations += [
|
||||||
|
environ["XDG_CONFIG_HOME"] + "/spip2md.yml",
|
||||||
|
environ["XDG_CONFIG_HOME"] + "/spip2md.yaml",
|
||||||
|
]
|
||||||
|
|
||||||
|
if "HOME" in environ:
|
||||||
|
config_locations += [
|
||||||
|
environ["HOME"] + "/.config/spip2md.yml",
|
||||||
|
environ["HOME"] + "/.config/spip2md.yaml",
|
||||||
|
environ["HOME"] + "/spip2md.yml",
|
||||||
|
environ["HOME"] + "/spip2md.yaml",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Search in working directory in last resort
|
||||||
|
config_locations += [
|
||||||
|
"/spip2md.yml",
|
||||||
|
"/spip2md.yaml",
|
||||||
|
]
|
||||||
|
|
||||||
|
for path in config_locations:
|
||||||
|
if isfile(path):
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
# Global configuration object
|
# Global configuration object
|
||||||
class Configuration:
|
class Configuration:
|
||||||
# config_file: Optional[str] = None # Location of the config file
|
|
||||||
|
|
||||||
name: str = "spip2md" # Name of program, notably used in logs
|
|
||||||
|
|
||||||
db: str = "spip" # DB name
|
db: str = "spip" # DB name
|
||||||
db_host: str = "localhost" # Where is the DB
|
db_host: str = "localhost" # Where is the DB
|
||||||
db_user: str = "spip" # A DB user with read access to SPIP database
|
db_user: str = "spip" # A DB user with read access to SPIP database
|
||||||
@ -42,55 +72,23 @@ class Configuration:
|
|||||||
export_drafts: bool = True # Should we export drafts as draft:true articles
|
export_drafts: bool = True # Should we export drafts as draft:true articles
|
||||||
export_empty: bool = True # Should we export empty articles
|
export_empty: bool = True # Should we export empty articles
|
||||||
remove_html: bool = True # Should spip2md remove every HTML tags
|
remove_html: bool = True # Should spip2md remove every HTML tags
|
||||||
|
ignore_taxonomies = ("Gestion du site", "Gestion des articles", "Mise en page")
|
||||||
|
rename_taxonomies: dict[str, str] = {"equipes": "tag-equipes"}
|
||||||
metadata_markup: bool = False # Should spip2md keep the markup in metadata fields
|
metadata_markup: bool = False # Should spip2md keep the markup in metadata fields
|
||||||
title_max_length: int = 40 # Maximum length of a single title for directory names
|
title_max_length: int = 40 # Maximum length of a single title for directory names
|
||||||
unknown_char_replacement: str = "??" # Replaces unknown characters
|
unknown_char_replacement: str = "??" # Replaces unknown characters
|
||||||
clear_log: bool = True # Clear log before every run instead of appending to
|
clear_log: bool = True # Clear log before every run instead of appending to
|
||||||
clear_output: bool = True # Remove eventual output dir before running
|
clear_output: bool = True # Remove eventual output dir before running
|
||||||
ignore_patterns: list[str] = [] # Ignore objects of which title match
|
ignore_patterns: list[str] = [] # Ignore objects of which title match
|
||||||
|
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
||||||
|
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
||||||
export_filetype: str = "md" # Extension of exported text files
|
export_filetype: str = "md" # Extension of exported text files
|
||||||
|
debug_meta: bool = False # Include more metadata from SPIP DB in frontmatters
|
||||||
|
|
||||||
debug: bool = False # Enable debug mode
|
def __init__(self, config_file: Optional[str] = None):
|
||||||
|
if config_file is not None:
|
||||||
# Searches for a configuration file from standard locations or params
|
|
||||||
def _find_config_file(self, *start_locations: str) -> str:
|
|
||||||
# Search for config files in function params first
|
|
||||||
config_locations: list[str] = list(start_locations)
|
|
||||||
|
|
||||||
if "XDG_CONFIG_HOME" in environ:
|
|
||||||
config_locations += [
|
|
||||||
environ["XDG_CONFIG_HOME"] + "/spip2md.yml",
|
|
||||||
environ["XDG_CONFIG_HOME"] + "/spip2md.yaml",
|
|
||||||
]
|
|
||||||
|
|
||||||
if "HOME" in environ:
|
|
||||||
config_locations += [
|
|
||||||
environ["HOME"] + "/.config/spip2md.yml",
|
|
||||||
environ["HOME"] + "/.config/spip2md.yaml",
|
|
||||||
environ["HOME"] + "/spip2md.yml",
|
|
||||||
environ["HOME"] + "/spip2md.yaml",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Search in working directory in last resort
|
|
||||||
config_locations += [
|
|
||||||
"/spip2md.yml",
|
|
||||||
"/spip2md.yaml",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Return the first path that actually exists
|
|
||||||
for path in config_locations:
|
|
||||||
if isfile(path):
|
|
||||||
# self.config_file = path
|
|
||||||
return path
|
|
||||||
# If not found, raise error
|
|
||||||
raise FileNotFoundError
|
|
||||||
|
|
||||||
def __init__(self, *argv: str):
|
|
||||||
try:
|
|
||||||
# Read config from config file
|
# Read config from config file
|
||||||
with open(self._find_config_file(*argv[1:])) as f:
|
with open(config_file) as f:
|
||||||
# Tell user about config
|
|
||||||
print(f"Read configuration file from {f.name}")
|
|
||||||
config = load(f.read(), Loader=Loader)
|
config = load(f.read(), Loader=Loader)
|
||||||
# Assign configuration for each attribute in config file
|
# Assign configuration for each attribute in config file
|
||||||
for attr in config:
|
for attr in config:
|
||||||
@ -102,5 +100,6 @@ class Configuration:
|
|||||||
setattr(self, attr, directory)
|
setattr(self, attr, directory)
|
||||||
else:
|
else:
|
||||||
setattr(self, attr, config[attr])
|
setattr(self, attr, config[attr])
|
||||||
except FileNotFoundError:
|
|
||||||
print("No configuration file found, using defaults")
|
|
||||||
|
CFG = Configuration(config())
|
||||||
|
@ -1,258 +0,0 @@
|
|||||||
"""
|
|
||||||
This file is part of spip2md.
|
|
||||||
Copyright (C) 2023 LCPQ/Guilhem Fauré
|
|
||||||
|
|
||||||
spip2md is free software: you can redistribute it and/or modify it under the terms of
|
|
||||||
the GNU General Public License version 2 as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
without even the implied warranty of MERCHANTABILITY or
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
See the GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with spip2md.
|
|
||||||
If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
|
|
||||||
This file contains the core classes of spip2md that models internal objects of spip
|
|
||||||
and methods to convert them to Markdown + YAML, static site structure
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
from os.path import basename, splitext
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
from slugify import slugify
|
|
||||||
|
|
||||||
from spip2md.config import Configuration
|
|
||||||
from spip2md.spip_models import (
|
|
||||||
SpipArticles,
|
|
||||||
SpipAuteurs,
|
|
||||||
SpipAuteursLiens,
|
|
||||||
SpipDocuments,
|
|
||||||
SpipDocumentsLiens,
|
|
||||||
SpipMots,
|
|
||||||
SpipMotsLiens,
|
|
||||||
SpipRubriques,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ConvertableDocument:
|
|
||||||
_log_c: logging.Logger # Logger for conversion operations
|
|
||||||
_cfg: Configuration # Global configuration
|
|
||||||
_spip_obj: SpipDocuments # The Spip Article this is representing
|
|
||||||
# Converted fields
|
|
||||||
_src: str # URL
|
|
||||||
_slug: str = "" # URL
|
|
||||||
_id: int
|
|
||||||
|
|
||||||
class Meta:
|
|
||||||
table_name: str = "spip_document" # Define the name of the Spip DB table
|
|
||||||
|
|
||||||
def __init__(self, spip_obj: SpipDocuments, cfg: Configuration):
|
|
||||||
self._log_c = logging.getLogger(cfg.name + ".convert.document")
|
|
||||||
self._cfg = cfg
|
|
||||||
self._spip_obj = spip_obj
|
|
||||||
self._id = int(spip_obj.id_document) # type: ignore
|
|
||||||
# Define source name of this file
|
|
||||||
self._src = cfg.data_dir + spip_obj.fichier
|
|
||||||
# Define destination name of this file
|
|
||||||
name, filetype = splitext(basename(str(spip_obj.fichier)))
|
|
||||||
prepend: str = str(spip_obj.id_document) + "-" if self._cfg.prepend_id else ""
|
|
||||||
self._slug = slugify(prepend + name, max_length=cfg.title_max_length) + filetype
|
|
||||||
|
|
||||||
|
|
||||||
class ConvertableRedactional:
|
|
||||||
_log_c: logging.Logger # Logger for conversion operations
|
|
||||||
_cfg: Configuration # Global configuration
|
|
||||||
_spip_obj: SpipArticles | SpipRubriques # The Spip Article this is representing
|
|
||||||
_depth: int # Depth
|
|
||||||
_children: dict[tuple[str, int], ConvertableDocument] = {} # Children
|
|
||||||
_id: int
|
|
||||||
_lang: str
|
|
||||||
_authors: tuple[SpipAuteurs, ...]
|
|
||||||
_tags: tuple[SpipMots, ...]
|
|
||||||
|
|
||||||
# Initialize documents related to self
|
|
||||||
def documents(
|
|
||||||
self, limit: int = 10**3
|
|
||||||
) -> dict[tuple[str, int], ConvertableDocument]:
|
|
||||||
self._log_c.debug(
|
|
||||||
"Initialize documents.\n"
|
|
||||||
+ f"Section: {self._spip_obj.titre}, Depth : {self._depth}"
|
|
||||||
)
|
|
||||||
documents = [
|
|
||||||
ConvertableDocument(doc, self._cfg)
|
|
||||||
for doc in (
|
|
||||||
SpipDocuments.select()
|
|
||||||
.join(
|
|
||||||
SpipDocumentsLiens,
|
|
||||||
on=(SpipDocuments.id_document == SpipDocumentsLiens.id_document),
|
|
||||||
)
|
|
||||||
.where(SpipDocumentsLiens.id_objet == self._id)
|
|
||||||
.limit(limit)
|
|
||||||
)
|
|
||||||
]
|
|
||||||
# Store them mutably
|
|
||||||
return {("document", d._id): d for d in documents}
|
|
||||||
|
|
||||||
# Initialize self authors
|
|
||||||
def authors(self) -> tuple[SpipAuteurs, ...]:
|
|
||||||
self._log_c.debug("Initialize authors")
|
|
||||||
return (
|
|
||||||
SpipAuteurs.select()
|
|
||||||
.join(
|
|
||||||
SpipAuteursLiens,
|
|
||||||
on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
|
|
||||||
)
|
|
||||||
.where(SpipAuteursLiens.id_objet == self._id)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize self tags
|
|
||||||
def tags(self) -> tuple[SpipMots]:
|
|
||||||
self._log_c.debug("Initialize tags")
|
|
||||||
return (
|
|
||||||
SpipMots.select()
|
|
||||||
.join(
|
|
||||||
SpipMotsLiens,
|
|
||||||
on=(SpipMots.id_mot == SpipMotsLiens.id_mot),
|
|
||||||
)
|
|
||||||
.where(SpipMotsLiens.id_objet == self._id)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ConvertableArticle(ConvertableRedactional):
|
|
||||||
_fileprefix: str = "index"
|
|
||||||
# Converted fields
|
|
||||||
_surtitle: str # Content
|
|
||||||
_title: str # Content
|
|
||||||
_subtitle: str # Content
|
|
||||||
_description: str # Content
|
|
||||||
_caption: str # Content
|
|
||||||
_extra: str # Content
|
|
||||||
_text: str # Content
|
|
||||||
_slug: str # URL
|
|
||||||
|
|
||||||
class Meta:
|
|
||||||
table_name: str = "spip_articles" # Define the name of the Spip DB table
|
|
||||||
|
|
||||||
def __init__(self, spip_obj: SpipArticles, cfg: Configuration, depth: int):
|
|
||||||
self._log_c = logging.getLogger(cfg.name + ".convert.article")
|
|
||||||
self._cfg = cfg
|
|
||||||
self._spip_obj = spip_obj
|
|
||||||
self._id = int(spip_obj.id_article) # type: ignore # Peewee types not defined
|
|
||||||
self._lang = str(spip_obj.lang)
|
|
||||||
self._depth = depth
|
|
||||||
self._draft = spip_obj.statut != "publie"
|
|
||||||
self._children |= self.documents() # Retreive documents & add them to the index
|
|
||||||
|
|
||||||
# Return children and itself in order to be indexed by the parent
|
|
||||||
def index(
|
|
||||||
self,
|
|
||||||
) -> dict[tuple[str, int], tuple[str, int]]:
|
|
||||||
return {child_key: ("article", self._id) for child_key in self._children}
|
|
||||||
|
|
||||||
|
|
||||||
# Define Section as an Article that can contain other Articles or Sections
|
|
||||||
class ConvertableSection(ConvertableRedactional):
|
|
||||||
_fileprefix: str = "_index" # Prefix of written Markdown files
|
|
||||||
# sub-sections, documents, articles
|
|
||||||
_children: dict[
|
|
||||||
tuple[str, int], "ConvertableSection | ConvertableArticle | ConvertableDocument"
|
|
||||||
] = {}
|
|
||||||
# Routing table to objects
|
|
||||||
_index: dict[tuple[str, int], tuple[str, int]] = {}
|
|
||||||
|
|
||||||
class Meta:
|
|
||||||
table_name: str = "spip_rubriques" # Define the name of the Spip DB table
|
|
||||||
|
|
||||||
# Get articles of this section
|
|
||||||
def articles(self, limit: int = 10**6):
|
|
||||||
self._log_c.debug(
|
|
||||||
"Initialize articles.\n"
|
|
||||||
+ f"Section: {self._spip_obj.titre}, Depth : {self._depth}"
|
|
||||||
)
|
|
||||||
articles = [
|
|
||||||
ConvertableArticle(art, self._cfg, self._depth)
|
|
||||||
for art in (
|
|
||||||
SpipArticles.select()
|
|
||||||
.where(SpipArticles.id_rubrique == self._id)
|
|
||||||
.order_by(SpipArticles.date.desc())
|
|
||||||
.limit(limit)
|
|
||||||
)
|
|
||||||
]
|
|
||||||
# Add these articles and their children to self index
|
|
||||||
for article in articles:
|
|
||||||
self._index |= article.index()
|
|
||||||
# Store them mutably
|
|
||||||
return {("article", art._id): art for art in articles}
|
|
||||||
|
|
||||||
# Get subsections of this section
|
|
||||||
def sections(self, limit: int = 10**6):
|
|
||||||
self._log_c.debug(
|
|
||||||
"Initialize subsections of\n"
|
|
||||||
+ f"section {self._spip_obj.titre} of depth {self._depth}"
|
|
||||||
)
|
|
||||||
sections = [
|
|
||||||
ConvertableSection(sec, self._cfg, self._depth)
|
|
||||||
for sec in (
|
|
||||||
SpipRubriques.select()
|
|
||||||
.where(SpipRubriques.id_parent == self._id)
|
|
||||||
.order_by(SpipRubriques.date.desc())
|
|
||||||
.limit(limit)
|
|
||||||
)
|
|
||||||
]
|
|
||||||
# Add these sections’s indexes to self index, replacing next hop with section
|
|
||||||
for section in sections:
|
|
||||||
self._index |= {
|
|
||||||
obj_key: ("section", section._id) for obj_key in section._index
|
|
||||||
}
|
|
||||||
# Store them mutably
|
|
||||||
return {("section", sec._id): sec for sec in sections}
|
|
||||||
|
|
||||||
def __init__(self, spip_obj: SpipRubriques, cfg: Configuration, parent_depth: int):
|
|
||||||
self._log_c = logging.getLogger(cfg.name + ".convert.section")
|
|
||||||
self._cfg = cfg
|
|
||||||
self._spip_obj = spip_obj
|
|
||||||
self._id = int(spip_obj.id_rubrique) # type: ignore
|
|
||||||
self._lang = str(spip_obj.lang)
|
|
||||||
self._depth = parent_depth + 1
|
|
||||||
self._children |= self.documents()
|
|
||||||
self._children |= self.articles()
|
|
||||||
self._children |= self.sections()
|
|
||||||
|
|
||||||
|
|
||||||
# The "root" element representing the whole converted site
|
|
||||||
class ConvertableSite:
|
|
||||||
_log_c: logging.Logger # Logger for conversion operations
|
|
||||||
_cfg: Configuration # Global configuration
|
|
||||||
_children: dict[tuple[str, int], ConvertableSection] = {} # Root sections
|
|
||||||
_index: dict[tuple[str, int], tuple[str, int]] = {} # Routing table to objects
|
|
||||||
|
|
||||||
_id: int = 0 # Parent ID of root sections
|
|
||||||
_depth: int = 0 # Depth
|
|
||||||
|
|
||||||
def sections(self) -> dict[tuple[str, int], ConvertableSection]:
|
|
||||||
self._log_c.debug("Initialize ROOT sections")
|
|
||||||
# Get all sections of parentID root_id
|
|
||||||
sections = [
|
|
||||||
ConvertableSection(sec, self._cfg, self._depth)
|
|
||||||
for sec in (
|
|
||||||
SpipRubriques.select()
|
|
||||||
.where(SpipRubriques.id_parent == self._id)
|
|
||||||
.order_by(SpipRubriques.date.desc())
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add these sections’s indexes to self index, replacing next hop with section
|
|
||||||
# do this while outputting it as the children
|
|
||||||
def sec_to_index(section: ConvertableSection):
|
|
||||||
for obj_key in section._index:
|
|
||||||
self._index[obj_key] = ("section", section._id)
|
|
||||||
return ("section", section._id)
|
|
||||||
|
|
||||||
return {sec_to_index(subsection): subsection for subsection in sections}
|
|
||||||
|
|
||||||
def __init__(self, cfg: Configuration) -> None:
|
|
||||||
self._log_c = logging.getLogger(cfg.name + ".convert.site")
|
|
||||||
self._cfg = cfg
|
|
||||||
self._children |= self.sections()
|
|
934
spip2md/extended_models.py
Normal file
934
spip2md/extended_models.py
Normal file
@ -0,0 +1,934 @@
|
|||||||
|
"""
|
||||||
|
This file is part of spip2md.
|
||||||
|
Copyright (C) 2023 LCPQ/Guilhem Fauré
|
||||||
|
|
||||||
|
spip2md is free software: you can redistribute it and/or modify it under the terms of
|
||||||
|
the GNU General Public License version 2 as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
See the GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along with spip2md.
|
||||||
|
If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from os import listdir, mkdir
|
||||||
|
from os.path import basename, isfile, splitext
|
||||||
|
from re import I, Match, Pattern, finditer, match, search
|
||||||
|
from re import error as re_error
|
||||||
|
from shutil import copyfile
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from peewee import (
|
||||||
|
BigAutoField,
|
||||||
|
BigIntegerField,
|
||||||
|
DateTimeField,
|
||||||
|
DoesNotExist,
|
||||||
|
)
|
||||||
|
from slugify import slugify
|
||||||
|
from yaml import dump
|
||||||
|
|
||||||
|
from spip2md.config import CFG, NAME
|
||||||
|
from spip2md.regexmaps import (
|
||||||
|
ARTICLE_LINK,
|
||||||
|
BLOAT,
|
||||||
|
CONFIG_LANGS,
|
||||||
|
DOCUMENT_LINK,
|
||||||
|
HTMLTAGS,
|
||||||
|
IMAGE_LINK,
|
||||||
|
ISO_UTF,
|
||||||
|
MULTILANG_BLOCK,
|
||||||
|
SECTION_LINK,
|
||||||
|
SPECIAL_OUTPUT,
|
||||||
|
SPIP_MARKDOWN,
|
||||||
|
UNKNOWN_ISO,
|
||||||
|
WARNING_OUTPUT,
|
||||||
|
)
|
||||||
|
from spip2md.spip_models import (
|
||||||
|
SpipArticles,
|
||||||
|
SpipAuteurs,
|
||||||
|
SpipAuteursLiens,
|
||||||
|
SpipDocuments,
|
||||||
|
SpipDocumentsLiens,
|
||||||
|
SpipMots,
|
||||||
|
SpipMotsLiens,
|
||||||
|
SpipRubriques,
|
||||||
|
)
|
||||||
|
from spip2md.style import BOLD, CYAN, GREEN, WARNING_STYLE, YELLOW, esc
|
||||||
|
|
||||||
|
DeepDict = dict[str, "list[DeepDict] | list[str] | str"]
|
||||||
|
|
||||||
|
# Define logger for this file’s logs
|
||||||
|
LOG = logging.getLogger(NAME + ".models")
|
||||||
|
|
||||||
|
# Define type that images can have
|
||||||
|
IMG_TYPES = ("jpg", "png", "jpeg", "gif", "webp", "ico")
|
||||||
|
|
||||||
|
|
||||||
|
class SpipWritable:
|
||||||
|
# From SPIP database
|
||||||
|
texte: str
|
||||||
|
lang: str
|
||||||
|
titre: str
|
||||||
|
descriptif: str
|
||||||
|
statut: str
|
||||||
|
profondeur: int
|
||||||
|
# Converted fields
|
||||||
|
_storage_title: str # Title with which directories names are built
|
||||||
|
_draft: bool
|
||||||
|
# Additional fields
|
||||||
|
_id: BigAutoField | int = 0 # same ID attribute name for all objects
|
||||||
|
_depth: int # Equals `profondeur` for sections
|
||||||
|
_fileprefix: str # String to prepend to written files
|
||||||
|
_storage_parentdir: str # Path from output dir to direct parent
|
||||||
|
_style: tuple[int, ...] # _styles to apply to some elements of printed output
|
||||||
|
_storage_title_append: int = 0 # Append a number to storage title if > 0
|
||||||
|
|
||||||
|
# Apply a mapping from regex maps
|
||||||
|
@staticmethod
|
||||||
|
def apply_mapping(text: str, mapping: tuple, keep_markup: bool = True) -> str:
|
||||||
|
if type(mapping) == tuple and len(mapping) > 0:
|
||||||
|
if type(mapping[0]) == tuple and len(mapping[0]) > 0:
|
||||||
|
if type(mapping[0][0]) == Pattern: # Mostly for syntax conversion
|
||||||
|
for old, new in mapping:
|
||||||
|
if keep_markup:
|
||||||
|
text = old.sub(new, text)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
text = old.sub(r"\1", text)
|
||||||
|
except re_error:
|
||||||
|
text = old.sub("", text)
|
||||||
|
else:
|
||||||
|
for old, new in mapping: # Mostly for broken encoding
|
||||||
|
text = text.replace(old, new)
|
||||||
|
elif type(mapping[0]) == Pattern:
|
||||||
|
for old in mapping:
|
||||||
|
text = old.sub("", text)
|
||||||
|
else:
|
||||||
|
for old in mapping:
|
||||||
|
text = text.replace(old, "")
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Warn about unknown chars & replace them with config defined replacement
|
||||||
|
def warn_unknown(self, text: str, unknown_mapping: tuple) -> str:
|
||||||
|
# Return unknown char surrounded by context_length chars
|
||||||
|
def unknown_chars_context(text: str, char: str, context_len: int = 24) -> str:
|
||||||
|
context: str = r".{0," + str(context_len) + r"}"
|
||||||
|
m = search(
|
||||||
|
context + r"(?=" + char + r")" + char + context,
|
||||||
|
text,
|
||||||
|
)
|
||||||
|
if m is not None:
|
||||||
|
return m.group()
|
||||||
|
else:
|
||||||
|
return char
|
||||||
|
|
||||||
|
for char in unknown_mapping:
|
||||||
|
lastend: int = 0
|
||||||
|
for m in finditer("(" + char + ")+", text):
|
||||||
|
context: str = unknown_chars_context(text[lastend:], char)
|
||||||
|
LOG.warn(
|
||||||
|
f"Unknown char {char} found in {self.titre[:40]} at: {context}"
|
||||||
|
)
|
||||||
|
if CFG.unknown_char_replacement is not None:
|
||||||
|
LOG.warn(
|
||||||
|
f"Replacing {m.group()} with {CFG.unknown_char_replacement}"
|
||||||
|
)
|
||||||
|
text = text.replace(m.group(), CFG.unknown_char_replacement, 1)
|
||||||
|
lastend = m.end()
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Apply needed methods on text fields
|
||||||
|
def convert_field(self, field: Optional[str], keep_markup: bool = True) -> str:
|
||||||
|
if field is None:
|
||||||
|
return ""
|
||||||
|
if len(field) == 0:
|
||||||
|
return ""
|
||||||
|
# Convert SPIP syntax to Markdown
|
||||||
|
field = self.apply_mapping(field, SPIP_MARKDOWN, keep_markup)
|
||||||
|
# Remove useless text
|
||||||
|
field = self.apply_mapping(field, BLOAT)
|
||||||
|
# Convert broken ISO encoding to UTF
|
||||||
|
field = self.apply_mapping(field, ISO_UTF)
|
||||||
|
if CFG.remove_html:
|
||||||
|
# Delete remaining HTML tags in body WARNING
|
||||||
|
field = self.apply_mapping(field, HTMLTAGS)
|
||||||
|
# Warn about unknown chars
|
||||||
|
field = self.warn_unknown(field, UNKNOWN_ISO)
|
||||||
|
return field.strip() # Strip whitespaces around text
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
# Initialize converted fields beginning with underscore
|
||||||
|
self._description: str = self.convert_field(self.descriptif)
|
||||||
|
self._draft = self.statut != "publie"
|
||||||
|
|
||||||
|
# Apply post-init conversions and cancel the export if self not of the right lang
|
||||||
|
def convert(self) -> None:
|
||||||
|
self._storage_title = self.convert_field(self.titre)
|
||||||
|
if not CFG.export_drafts and self._draft:
|
||||||
|
raise DontExportDraftError(f"{self.titre} is a draft, cancelling export")
|
||||||
|
|
||||||
|
def dest_directory(self) -> str:
|
||||||
|
raise NotImplementedError("Subclasses need to implement directory()")
|
||||||
|
|
||||||
|
def dest_filename(self, prepend: str = "", append: str = "") -> str:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"Subclasses need to implement dest_filename(), params:{prepend}{append}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def dest_path(self) -> str:
|
||||||
|
return self.dest_directory() + self.dest_filename()
|
||||||
|
|
||||||
|
# Print one or more line(s) in which special elements are stylized
|
||||||
|
def style_print(
|
||||||
|
self, string: str, indent: Optional[str] = " ", end: str = "\n"
|
||||||
|
) -> str:
|
||||||
|
stylized: str = string
|
||||||
|
for o in SPECIAL_OUTPUT:
|
||||||
|
stylized = o.sub(esc(*self._style) + r"\1" + esc(), stylized)
|
||||||
|
for w in WARNING_OUTPUT:
|
||||||
|
stylized = w.sub(esc(*WARNING_STYLE) + r"\1" + esc(), stylized)
|
||||||
|
if indent is not None and len(indent) > 0:
|
||||||
|
stylized = indent * self._depth + stylized
|
||||||
|
print(stylized, end=end)
|
||||||
|
# Return the stylized string in case
|
||||||
|
return stylized
|
||||||
|
|
||||||
|
# Print the message telling what is going to be done
|
||||||
|
def begin_message(self, index: int, limit: int, step: int = 100) -> str:
|
||||||
|
# Output the remaining number of objects to export every step object
|
||||||
|
if index % step == 0 and limit > 0:
|
||||||
|
counter: str = f"Exporting {limit-index} level {self._depth}"
|
||||||
|
s: str = "s" if limit - index > 1 else ""
|
||||||
|
if hasattr(self, "lang"):
|
||||||
|
counter += f" {self.lang}"
|
||||||
|
counter += f" {type(self).__name__}{s}"
|
||||||
|
# Print the output as the program goes
|
||||||
|
self.style_print(counter)
|
||||||
|
# Output the counter & title of the object being exported
|
||||||
|
msg: str = f"{index + 1}. "
|
||||||
|
if len(self._storage_title) == 0:
|
||||||
|
msg += "EMPTY NAME"
|
||||||
|
else:
|
||||||
|
msg += self._storage_title
|
||||||
|
# Print the output as the program goes
|
||||||
|
# LOG.debug(f"Begin exporting {type(self).__name__} {output[-1]}")
|
||||||
|
self.style_print(msg, end="")
|
||||||
|
return msg
|
||||||
|
|
||||||
|
# Write object to output destination
|
||||||
|
def write(self) -> str:
|
||||||
|
raise NotImplementedError("Subclasses need to implement write()")
|
||||||
|
|
||||||
|
# Output information about file that was just exported
|
||||||
|
def end_message(self, message: str | Exception) -> str:
|
||||||
|
output: str = " -> "
|
||||||
|
if type(message) is FileNotFoundError:
|
||||||
|
output += "ERROR: NOT FOUND: "
|
||||||
|
elif type(message) is DoesNotExist:
|
||||||
|
output += "ERROR: NO DESTINATION DIR: "
|
||||||
|
elif type(message) is DontExportDraftError:
|
||||||
|
output += "ERROR: NOT EXPORTING DRAFT: "
|
||||||
|
elif type(message) is DontExportEmptyError:
|
||||||
|
output += "ERROR: NOT EXPORTING EMPTY: "
|
||||||
|
elif type(message) is not str:
|
||||||
|
output += "ERROR: UNKNOWN: "
|
||||||
|
# Print the output as the program goes
|
||||||
|
# LOG.debug(f"Finished exporting {type(self).__name__}: {message}")
|
||||||
|
self.style_print(output + str(message), indent=None)
|
||||||
|
return output + str(message)
|
||||||
|
|
||||||
|
# Perform all the write steps of this object
|
||||||
|
def write_all(
|
||||||
|
self,
|
||||||
|
parentdepth: int,
|
||||||
|
storage_parentdir: str,
|
||||||
|
index: int,
|
||||||
|
total: int,
|
||||||
|
parenturl: str,
|
||||||
|
) -> str:
|
||||||
|
LOG.debug(f"Writing {type(self).__name__} `{self._storage_title}`")
|
||||||
|
self._depth = parentdepth + 1
|
||||||
|
self._storage_parentdir = storage_parentdir
|
||||||
|
self._parenturl = parenturl
|
||||||
|
output: str = self.begin_message(index, total)
|
||||||
|
try:
|
||||||
|
output += self.end_message(self.write())
|
||||||
|
except (
|
||||||
|
LangNotFoundError,
|
||||||
|
DontExportDraftError,
|
||||||
|
DontExportEmptyError,
|
||||||
|
IgnoredPatternError,
|
||||||
|
FileNotFoundError,
|
||||||
|
) as err:
|
||||||
|
output += self.end_message(err)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class Document(SpipWritable, SpipDocuments):
|
||||||
|
_fileprefix: str = ""
|
||||||
|
_style = (BOLD, CYAN) # Documents accent color is blue
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
table_name: str = "spip_documents"
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self._id = self.id_document
|
||||||
|
|
||||||
|
# Get source name of this file
|
||||||
|
def src_path(self, data_dir: Optional[str] = None) -> str:
|
||||||
|
if data_dir is None:
|
||||||
|
return CFG.data_dir + self.fichier
|
||||||
|
return data_dir + self.fichier
|
||||||
|
|
||||||
|
# Get directory of this object
|
||||||
|
def dest_directory(self, prepend: str = "", append: str = "") -> str:
|
||||||
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
|
return (
|
||||||
|
self._storage_parentdir
|
||||||
|
+ prepend
|
||||||
|
+ slugify(_id + self._storage_title, max_length=100)
|
||||||
|
+ append
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get destination slugified name of this file
|
||||||
|
def dest_filename(self, prepend: str = "", append: str = "") -> str:
|
||||||
|
name, filetype = splitext(basename(str(self.fichier)))
|
||||||
|
return slugify(prepend + name, max_length=100) + append + filetype
|
||||||
|
|
||||||
|
# Write document to output destination
|
||||||
|
def write(self) -> str:
|
||||||
|
# Copy the document from it’s SPIP location to the new location
|
||||||
|
return copyfile(self.src_path(), self.dest_path())
|
||||||
|
|
||||||
|
# Perform all the write steps of this object
|
||||||
|
def write_all(
|
||||||
|
self,
|
||||||
|
parentdepth: int,
|
||||||
|
storage_parentdir: str,
|
||||||
|
index: int,
|
||||||
|
total: int,
|
||||||
|
forcedlang: Optional[str] = None,
|
||||||
|
parenturl: str = "",
|
||||||
|
) -> str:
|
||||||
|
self.convert() # Apply post-init conversions
|
||||||
|
LOG.debug(
|
||||||
|
f"Document {self._storage_title} doesn’t care about forcedlang {forcedlang}"
|
||||||
|
)
|
||||||
|
LOG.debug(
|
||||||
|
f"Document {self._storage_title} doesn’t care about parenturl {parenturl}"
|
||||||
|
)
|
||||||
|
return super().write_all(
|
||||||
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IgnoredPatternError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LangNotFoundError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DontExportDraftError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DontExportEmptyError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SpipRedactional(SpipWritable):
|
||||||
|
id_trad: BigIntegerField | BigAutoField | int
|
||||||
|
id_rubrique: BigAutoField | int
|
||||||
|
# date: DateTimeField | str
|
||||||
|
date: DateTimeField
|
||||||
|
maj: str
|
||||||
|
id_secteur: BigIntegerField | int
|
||||||
|
extra: str
|
||||||
|
langue_choisie: str
|
||||||
|
# Converted
|
||||||
|
_text: str
|
||||||
|
_taxonomies: dict[str, list[str]] = {}
|
||||||
|
_url_title: str # Title in metadata of articles
|
||||||
|
_parenturl: str # URL relative to lang to direct parent
|
||||||
|
_static_img_path: Optional[str] = None # Path to the static img of this article
|
||||||
|
|
||||||
|
# Get rid of other lang than forced in text and modify lang to forced if found
|
||||||
|
def translate_multi(
|
||||||
|
self, forced_lang: str, text: str, change_lang: bool = True
|
||||||
|
) -> str:
|
||||||
|
# LOG.debug(f"Translating <multi> blocks of `{self._url_title}`")
|
||||||
|
# for each <multi> blocks, keep only forced lang
|
||||||
|
lang: Optional[Match[str]] = None
|
||||||
|
for block in MULTILANG_BLOCK.finditer(text):
|
||||||
|
lang = CONFIG_LANGS[forced_lang].search(block.group(1))
|
||||||
|
if lang is not None:
|
||||||
|
# Log the translation
|
||||||
|
trans: str = lang.group(1)[:50].strip()
|
||||||
|
LOG.debug(
|
||||||
|
f"Keeping {forced_lang} translation of `{self._url_title}`: "
|
||||||
|
+ f"`{trans}`"
|
||||||
|
)
|
||||||
|
if change_lang:
|
||||||
|
self.lang = forced_lang # So write-all will not be cancelled
|
||||||
|
# Replace the mutli blocks with the text in the proper lang
|
||||||
|
text = text.replace(block.group(), lang.group(1))
|
||||||
|
else:
|
||||||
|
# Replace the mutli blocks with the text inside
|
||||||
|
text = text.replace(block.group(), block.group(1))
|
||||||
|
if lang is None:
|
||||||
|
LOG.debug(f"{forced_lang} not found in `{self._url_title}`")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def replace_links(self, text: str) -> str:
|
||||||
|
class LinkMappings:
|
||||||
|
_link_types = IMAGE_LINK, DOCUMENT_LINK, SECTION_LINK, ARTICLE_LINK
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self._type_cursor = 0
|
||||||
|
self._link_cursor = -1
|
||||||
|
return self
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getdocument(obj_id: int) -> Document:
|
||||||
|
doc: Document = Document.get(Document.id_document == obj_id)
|
||||||
|
doc.convert()
|
||||||
|
return doc
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getsection(obj_id: int) -> Section:
|
||||||
|
sec: Section = Section.get(Section.id_rubrique == obj_id)
|
||||||
|
sec.convert(self.lang)
|
||||||
|
return sec
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getarticle(obj_id: int) -> Article:
|
||||||
|
art: Article = Article.get(Article.id_article == obj_id)
|
||||||
|
art.convert(self.lang)
|
||||||
|
return art
|
||||||
|
|
||||||
|
_obj_getters = getdocument, getdocument, getsection, getarticle
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
self._link_cursor += 1
|
||||||
|
# If we reach end of current link type, pass to the beginning of next
|
||||||
|
if self._link_cursor >= len(self._link_types[self._type_cursor]):
|
||||||
|
self._link_cursor = 0
|
||||||
|
self._type_cursor += 1
|
||||||
|
|
||||||
|
if self._type_cursor >= len(self._link_types):
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
return (
|
||||||
|
self._link_types[self._type_cursor][self._link_cursor],
|
||||||
|
self._obj_getters[self._type_cursor],
|
||||||
|
"!" if self._type_cursor == 0 else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
for link, getobj, prepend in LinkMappings():
|
||||||
|
# LOG.debug(f"Looking for {link} in {text}")
|
||||||
|
for m in link.finditer(text):
|
||||||
|
LOG.debug(f"Found internal link {m.group()} in {self._url_title}")
|
||||||
|
try:
|
||||||
|
LOG.debug(
|
||||||
|
f"Searching for object of id {m.group(2)} with "
|
||||||
|
+ getobj.__name__
|
||||||
|
)
|
||||||
|
o: "Document | Article | Section" = getobj(int(m.group(2)))
|
||||||
|
# TODO get full relative path for sections and articles
|
||||||
|
# TODO rewrite links markup (bold/italic) after stripping
|
||||||
|
if len(m.group(1)) > 0:
|
||||||
|
repl = f"{prepend}[{m.group(1)}]({o.dest_filename()})"
|
||||||
|
else:
|
||||||
|
repl = f"{prepend}[{o._storage_title}]({o.dest_filename()})"
|
||||||
|
LOG.debug(
|
||||||
|
f"Translate link {m.group()} to {repl} in {self._url_title}"
|
||||||
|
)
|
||||||
|
text = text.replace(m.group(), repl)
|
||||||
|
except DoesNotExist:
|
||||||
|
LOG.warn(f"No object for link {m.group()} in {self._url_title}")
|
||||||
|
text = text.replace(m.group(), prepend + "[](NOT FOUND)", 1)
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Get this object url, or none if it’s the same as directory
|
||||||
|
def url(self) -> str:
|
||||||
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
|
counter: str = (
|
||||||
|
"_" + str(self._storage_title_append)
|
||||||
|
if self._storage_title_append > 0
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
# Return none if url will be the same as directory
|
||||||
|
return (
|
||||||
|
self._parenturl
|
||||||
|
+ slugify(_id + self._url_title, max_length=CFG.title_max_length)
|
||||||
|
+ counter
|
||||||
|
+ r"/"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get slugified directory of this object
|
||||||
|
def dest_directory(self) -> str:
|
||||||
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
|
counter: str = (
|
||||||
|
"_" + str(self._storage_title_append)
|
||||||
|
if self._storage_title_append > 0
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
directory: str = self._storage_parentdir + slugify(
|
||||||
|
_id + self._storage_title,
|
||||||
|
max_length=CFG.title_max_length,
|
||||||
|
)
|
||||||
|
return directory + counter + r"/"
|
||||||
|
|
||||||
|
# Get filename of this object
|
||||||
|
def dest_filename(self) -> str:
|
||||||
|
return self._fileprefix + "." + self.lang + "." + CFG.export_filetype
|
||||||
|
|
||||||
|
def convert_title(self, forced_lang: str) -> None:
|
||||||
|
LOG.debug(f"Convert title of currently untitled {type(self).__name__}")
|
||||||
|
if hasattr(self, "_title"):
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} _title is already set")
|
||||||
|
return
|
||||||
|
if self.titre is None:
|
||||||
|
LOG.debug(f"{type(self).__name__} title is None")
|
||||||
|
self._url_title = ""
|
||||||
|
return
|
||||||
|
if len(self.titre) == 0:
|
||||||
|
LOG.debug(f"{type(self).__name__} title is empty")
|
||||||
|
self._url_title = ""
|
||||||
|
return
|
||||||
|
self._url_title = self.titre.strip()
|
||||||
|
# Set storage title to language of storage lang if different
|
||||||
|
storage_lang: str = (
|
||||||
|
CFG.storage_language if CFG.storage_language is not None else forced_lang
|
||||||
|
)
|
||||||
|
LOG.debug(
|
||||||
|
f"Searching for {storage_lang} in <multi> blocks of `{self._url_title}`"
|
||||||
|
+ " storage title"
|
||||||
|
)
|
||||||
|
self._storage_title = self.translate_multi(
|
||||||
|
storage_lang,
|
||||||
|
self._url_title,
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
LOG.debug(
|
||||||
|
f"Searching for {forced_lang} in <multi> blocks of `{self._url_title}`"
|
||||||
|
+ " URL title"
|
||||||
|
)
|
||||||
|
self._url_title = self.translate_multi(forced_lang, self._url_title)
|
||||||
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` title")
|
||||||
|
self._storage_title = self.replace_links(self._storage_title)
|
||||||
|
self._url_title = self.replace_links(self._url_title)
|
||||||
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
|
||||||
|
self._storage_title = self.convert_field(self._storage_title)
|
||||||
|
self._url_title = self.convert_field(self._url_title, CFG.metadata_markup)
|
||||||
|
for p in CFG.ignore_patterns:
|
||||||
|
for title in (self._storage_title, self._url_title):
|
||||||
|
m = match(p, title, I)
|
||||||
|
if m is not None:
|
||||||
|
raise IgnoredPatternError(
|
||||||
|
f"{self._url_title} matches with ignore pattern {p}, ignoring"
|
||||||
|
)
|
||||||
|
|
||||||
|
def convert_text(self, forced_lang: str) -> None:
|
||||||
|
LOG.debug(f"Convert text of `{self._url_title}`")
|
||||||
|
if hasattr(self, "_text"):
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} _text is already set")
|
||||||
|
return
|
||||||
|
if self.texte is None:
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} text is None")
|
||||||
|
self._text = ""
|
||||||
|
return
|
||||||
|
if len(self.texte) == 0:
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} text is empty")
|
||||||
|
self._text = ""
|
||||||
|
return
|
||||||
|
self._text = self.translate_multi(forced_lang, self.texte.strip())
|
||||||
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` text")
|
||||||
|
self._text = self.replace_links(self._text)
|
||||||
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` text")
|
||||||
|
self._text = self.convert_field(self._text)
|
||||||
|
|
||||||
|
def convert_extra(self) -> None:
|
||||||
|
LOG.debug(f"Convert extra of `{self._url_title}`")
|
||||||
|
if hasattr(self, "_extra"):
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} _extra is already set")
|
||||||
|
return
|
||||||
|
if self.extra is None:
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} extra is None")
|
||||||
|
self._extra = ""
|
||||||
|
return
|
||||||
|
if len(self.extra) == 0:
|
||||||
|
LOG.debug(f"{type(self).__name__} {self._url_title} extra is empty")
|
||||||
|
self._extra = ""
|
||||||
|
return
|
||||||
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` extra")
|
||||||
|
self._extra = self.replace_links(self._extra)
|
||||||
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` extra")
|
||||||
|
self._extra = self.convert_field(self._extra, CFG.metadata_markup)
|
||||||
|
|
||||||
|
def convert_taxonomies(self, forcedlang: str) -> None:
|
||||||
|
self._taxonomies = {}
|
||||||
|
|
||||||
|
for tag in self.taxonomies():
|
||||||
|
taxonomy = str(tag.type)
|
||||||
|
if taxonomy not in CFG.ignore_taxonomies:
|
||||||
|
LOG.debug(
|
||||||
|
f"Translate taxonomy of `{self._url_title}`: {tag.descriptif}"
|
||||||
|
)
|
||||||
|
if taxonomy in CFG.rename_taxonomies:
|
||||||
|
LOG.debug(
|
||||||
|
f"Rename taxonomy {taxonomy}: {CFG.rename_taxonomies[taxonomy]}"
|
||||||
|
)
|
||||||
|
taxonomy = CFG.rename_taxonomies[taxonomy]
|
||||||
|
if str(taxonomy) in self._taxonomies:
|
||||||
|
self._taxonomies[taxonomy].append(
|
||||||
|
self.convert_field(
|
||||||
|
self.translate_multi(forcedlang, str(tag.descriptif), False)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._taxonomies[taxonomy] = [
|
||||||
|
self.convert_field(
|
||||||
|
self.translate_multi(forcedlang, str(tag.descriptif), False)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
LOG.debug(
|
||||||
|
f"After translation, taxonomies of `{self._url_title}`: {self._taxonomies}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
# Initialize converted fields, beginning with underscore
|
||||||
|
self._choosen_language = self.langue_choisie == "oui"
|
||||||
|
|
||||||
|
# Get related documents
|
||||||
|
def documents(self) -> tuple[Document]:
|
||||||
|
LOG.debug(f"Initialize documents of `{self._url_title}`")
|
||||||
|
documents = (
|
||||||
|
Document.select()
|
||||||
|
.join(
|
||||||
|
SpipDocumentsLiens,
|
||||||
|
on=(Document.id_document == SpipDocumentsLiens.id_document),
|
||||||
|
)
|
||||||
|
.where(SpipDocumentsLiens.id_objet == self._id)
|
||||||
|
)
|
||||||
|
return documents
|
||||||
|
|
||||||
|
# Get the YAML frontmatter string
|
||||||
|
def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
|
||||||
|
# LOG.debug(f"Write frontmatter of `{self._title}`")
|
||||||
|
meta: dict[str, Any] = {
|
||||||
|
"lang": self.lang,
|
||||||
|
"translationKey": self.id_trad if self.id_trad != 0 else self._id,
|
||||||
|
"title": self._url_title,
|
||||||
|
"publishDate": self.date,
|
||||||
|
"lastmod": self.maj,
|
||||||
|
"draft": self._draft,
|
||||||
|
"description": self._description,
|
||||||
|
}
|
||||||
|
# Add debugging meta if needed
|
||||||
|
if CFG.debug_meta:
|
||||||
|
meta = meta | {
|
||||||
|
"spip_id": self._id,
|
||||||
|
"spip_id_secteur": self.id_secteur,
|
||||||
|
}
|
||||||
|
# Add url if different of directory
|
||||||
|
if self.url() not in self.dest_directory():
|
||||||
|
meta = meta | {"url": self.url()}
|
||||||
|
if append is not None:
|
||||||
|
return dump(meta | append, allow_unicode=True)
|
||||||
|
else:
|
||||||
|
return dump(meta, allow_unicode=True)
|
||||||
|
|
||||||
|
# Get file text content
|
||||||
|
def content(self) -> str:
|
||||||
|
# LOG.debug(f"Write content of `{self._title}`")
|
||||||
|
# Start the content with frontmatter
|
||||||
|
body: str = "---\n" + self.frontmatter() + "---"
|
||||||
|
# Add the title as a Markdown h1
|
||||||
|
if self._url_title is not None and len(self._url_title) > 0 and CFG.prepend_h1:
|
||||||
|
body += "\n\n# " + self._url_title
|
||||||
|
# If there is a text, add the text preceded by two line breaks
|
||||||
|
if len(self._text) > 0:
|
||||||
|
# Remove remaining HTML after & append to body
|
||||||
|
body += "\n\n" + self._text
|
||||||
|
elif not CFG.export_empty:
|
||||||
|
raise DontExportEmptyError
|
||||||
|
# Same with an "extra" section
|
||||||
|
if len(self._extra) > 0:
|
||||||
|
body += "\n\n# EXTRA\n\n" + self._extra
|
||||||
|
return body
|
||||||
|
|
||||||
|
def authors(self) -> tuple[SpipAuteurs, ...]:
|
||||||
|
LOG.debug(f"Initialize authors of `{self._url_title}`")
|
||||||
|
return (
|
||||||
|
SpipAuteurs.select()
|
||||||
|
.join(
|
||||||
|
SpipAuteursLiens,
|
||||||
|
on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
|
||||||
|
)
|
||||||
|
.where(SpipAuteursLiens.id_objet == self._id)
|
||||||
|
)
|
||||||
|
|
||||||
|
def taxonomies(self) -> tuple[SpipMots, ...]:
|
||||||
|
LOG.debug(f"Initialize taxonomies of `{self._url_title}`")
|
||||||
|
return (
|
||||||
|
SpipMots.select()
|
||||||
|
.join(
|
||||||
|
SpipMotsLiens,
|
||||||
|
on=(SpipMots.id_mot == SpipMotsLiens.id_mot),
|
||||||
|
)
|
||||||
|
.where(SpipMotsLiens.id_objet == self._id)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write all the documents of this object
|
||||||
|
def write_children(
|
||||||
|
self,
|
||||||
|
children: tuple[Document] | tuple[Any],
|
||||||
|
forcedlang: str,
|
||||||
|
) -> list[str]:
|
||||||
|
LOG.debug(f"Writing documents of {type(self).__name__} `{self._url_title}`")
|
||||||
|
output: list[str] = []
|
||||||
|
total = len(children)
|
||||||
|
i = 0
|
||||||
|
for obj in children:
|
||||||
|
try:
|
||||||
|
output.append(
|
||||||
|
obj.write_all(
|
||||||
|
self._depth,
|
||||||
|
self.dest_directory(),
|
||||||
|
i,
|
||||||
|
total,
|
||||||
|
forcedlang,
|
||||||
|
self.url(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
i += 1
|
||||||
|
except (
|
||||||
|
LangNotFoundError,
|
||||||
|
DontExportDraftError,
|
||||||
|
DontExportEmptyError,
|
||||||
|
IgnoredPatternError,
|
||||||
|
) as err:
|
||||||
|
LOG.debug(err)
|
||||||
|
return output
|
||||||
|
|
||||||
|
# Write object to output destination
|
||||||
|
def write(self) -> str:
|
||||||
|
# Make a directory for this object if there isn’t
|
||||||
|
# If it cannot for incompatibility, try until it can
|
||||||
|
incompatible: bool = True
|
||||||
|
while incompatible:
|
||||||
|
directory: str = self.dest_directory()
|
||||||
|
try:
|
||||||
|
mkdir(directory)
|
||||||
|
break
|
||||||
|
except FileExistsError:
|
||||||
|
# If not stated incompatible with the following, will write in this dir
|
||||||
|
incompatible = False
|
||||||
|
# Create a new directory if write is about to overwrite an existing file
|
||||||
|
# or to write into a directory without the same fileprefix
|
||||||
|
for file in listdir(directory):
|
||||||
|
if isfile(directory + file):
|
||||||
|
LOG.debug(
|
||||||
|
f"Can {type(self).__name__} `{self.dest_path()}` of prefix "
|
||||||
|
+ f"{self._fileprefix} and suffix {CFG.export_filetype}"
|
||||||
|
+ f" be written along with `{file}` of prefix "
|
||||||
|
+ f"`{file.split('.')[0]}` and suffix {file.split('.')[-1]}"
|
||||||
|
+ f"` in {self.dest_directory()}` ?"
|
||||||
|
)
|
||||||
|
# Resolve conflict at first incompatible file encountered
|
||||||
|
if directory + file == self.dest_path() or (
|
||||||
|
file.split(".")[-1] == CFG.export_filetype
|
||||||
|
and file.split(".")[0] != self._fileprefix
|
||||||
|
):
|
||||||
|
LOG.debug(
|
||||||
|
f"No, incrementing counter of {self.dest_directory()}"
|
||||||
|
)
|
||||||
|
self._storage_title_append += 1
|
||||||
|
incompatible = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Write the content of this object into a file named as self.filename()
|
||||||
|
with open(self.dest_path(), "w") as f:
|
||||||
|
f.write(self.content())
|
||||||
|
# Write the eventual static image of this object
|
||||||
|
if self._static_img_path:
|
||||||
|
copyfile(
|
||||||
|
self._static_img_path,
|
||||||
|
self.dest_directory() + basename(self._static_img_path),
|
||||||
|
)
|
||||||
|
return self.dest_path()
|
||||||
|
|
||||||
|
# Append static images based on filename instead of DB to objects texts
|
||||||
|
def append_static_images(self, obj_str: str = "art", load_str: str = "on"):
|
||||||
|
for t in IMG_TYPES:
|
||||||
|
path: str = CFG.data_dir + obj_str + load_str + str(self._id) + "." + t
|
||||||
|
LOG.debug(f"Search static image of `{self._url_title}` at: {path}")
|
||||||
|
if isfile(path):
|
||||||
|
LOG.debug(f"Found static image of `{self._url_title}` at: {path}")
|
||||||
|
# Append static image to content
|
||||||
|
self._text += f"\n\n![]({basename(path)})"
|
||||||
|
# Store it’s path to write it later
|
||||||
|
self._static_img_path = path
|
||||||
|
break
|
||||||
|
|
||||||
|
# Apply post-init conversions and cancel the export if self not of the right lang
|
||||||
|
def convert(self, forced_lang: str) -> None:
|
||||||
|
self.convert_title(forced_lang)
|
||||||
|
self.convert_text(forced_lang)
|
||||||
|
self.convert_extra()
|
||||||
|
self.convert_taxonomies(forced_lang)
|
||||||
|
if self.lang != forced_lang:
|
||||||
|
raise LangNotFoundError(
|
||||||
|
f"`{self._url_title}` lang is {self.lang} instead of the wanted"
|
||||||
|
+ f" {forced_lang} and it don’t contains"
|
||||||
|
+ f" {forced_lang} translation in Markup either"
|
||||||
|
)
|
||||||
|
self.append_static_images()
|
||||||
|
|
||||||
|
|
||||||
|
class Article(SpipRedactional, SpipArticles):
|
||||||
|
_fileprefix: str = "index"
|
||||||
|
_style = (BOLD, YELLOW) # Articles accent color is yellow
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
table_name: str = "spip_articles"
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self._id = self.id_article
|
||||||
|
# Initialize converted fields beginning with underscore
|
||||||
|
self._accept_forum = self.accepter_forum == "oui"
|
||||||
|
self._surtitle = self.convert_field(str(self.surtitre))
|
||||||
|
self._subtitle = self.convert_field(str(self.soustitre))
|
||||||
|
self._caption = self.convert_field(str(self.chapo))
|
||||||
|
self._ps = self.convert_field(str(self.ps))
|
||||||
|
self._microblog = self.convert_field(str(self.microblog))
|
||||||
|
|
||||||
|
def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
|
||||||
|
meta: dict[str, Any] = {
|
||||||
|
# Article specific
|
||||||
|
"summary": self.chapo,
|
||||||
|
"surtitle": self.surtitre,
|
||||||
|
"subtitle": self.soustitre,
|
||||||
|
"date": self.date_redac,
|
||||||
|
"authors": [author.nom for author in self.authors()],
|
||||||
|
}
|
||||||
|
# Add debugging meta if needed
|
||||||
|
if CFG.debug_meta:
|
||||||
|
meta |= {"spip_id_rubrique": self.id_rubrique}
|
||||||
|
if self._taxonomies:
|
||||||
|
meta |= self._taxonomies
|
||||||
|
if append is not None:
|
||||||
|
return super().frontmatter(meta | append)
|
||||||
|
else:
|
||||||
|
return super().frontmatter(meta)
|
||||||
|
|
||||||
|
def content(self) -> str:
|
||||||
|
body: str = super().content()
|
||||||
|
# If there is a caption, add the caption followed by a hr
|
||||||
|
if len(self._caption) > 0:
|
||||||
|
body += "\n\n" + self._caption + "\n\n***"
|
||||||
|
# PS
|
||||||
|
if len(self._ps) > 0:
|
||||||
|
body += "\n\n# POST-SCRIPTUM\n\n" + self._ps
|
||||||
|
# Microblog
|
||||||
|
if len(self._microblog) > 0:
|
||||||
|
body += "\n\n# MICROBLOGGING\n\n" + self._microblog
|
||||||
|
return body
|
||||||
|
|
||||||
|
# Perform all the write steps of this object
|
||||||
|
def write_all(
|
||||||
|
self,
|
||||||
|
parentdepth: int,
|
||||||
|
storage_parentdir: str,
|
||||||
|
index: int,
|
||||||
|
total: int,
|
||||||
|
forced_lang: str,
|
||||||
|
parenturl: str,
|
||||||
|
) -> DeepDict:
|
||||||
|
self.convert(forced_lang)
|
||||||
|
return {
|
||||||
|
"msg": super().write_all(
|
||||||
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
|
),
|
||||||
|
"documents": self.write_children(self.documents(), forced_lang),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Section(SpipRedactional, SpipRubriques):
|
||||||
|
_fileprefix: str = "_index"
|
||||||
|
_style = (BOLD, GREEN) # Sections accent color is green
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
table_name: str = "spip_rubriques"
|
||||||
|
|
||||||
|
def frontmatter(self, add: Optional[dict[str, Any]] = None) -> str:
|
||||||
|
meta: dict[str, Any] = {}
|
||||||
|
# Add debugging meta if needed
|
||||||
|
if CFG.debug_meta:
|
||||||
|
meta = meta | {
|
||||||
|
"spip_id_parent": self.id_parent,
|
||||||
|
"spip_profondeur": self.profondeur,
|
||||||
|
}
|
||||||
|
if add is not None:
|
||||||
|
meta = meta | add
|
||||||
|
return super().frontmatter(meta)
|
||||||
|
|
||||||
|
# Get articles of this section
|
||||||
|
def articles(self, limit: int = 10**6) -> tuple[Article]:
|
||||||
|
LOG.debug(f"Initialize articles of `{self._url_title}`")
|
||||||
|
return (
|
||||||
|
Article.select()
|
||||||
|
.where(Article.id_rubrique == self._id)
|
||||||
|
.order_by(Article.date.desc())
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get subsections of this section
|
||||||
|
def sections(self, limit: int = 10**6) -> tuple["Section"]:
|
||||||
|
LOG.debug(f"Initialize subsections of `{self._url_title}`")
|
||||||
|
return (
|
||||||
|
Section.select()
|
||||||
|
.where(Section.id_parent == self._id)
|
||||||
|
.order_by(Section.date.desc())
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self._id = self.id_rubrique
|
||||||
|
self._depth = self.profondeur
|
||||||
|
|
||||||
|
# Perform all the write steps of this object
|
||||||
|
def write_all(
|
||||||
|
self,
|
||||||
|
parentdepth: int,
|
||||||
|
storage_parentdir: str,
|
||||||
|
index: int,
|
||||||
|
total: int,
|
||||||
|
forced_lang: str,
|
||||||
|
parenturl: str = "",
|
||||||
|
) -> DeepDict:
|
||||||
|
self.convert(forced_lang)
|
||||||
|
return {
|
||||||
|
"msg": super().write_all(
|
||||||
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
|
),
|
||||||
|
"documents": self.write_children(self.documents(), forced_lang),
|
||||||
|
"articles": self.write_children(self.articles(), forced_lang),
|
||||||
|
"sections": self.write_children(self.sections(), forced_lang),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Append static images based on filename instead of DB to objects texts
|
||||||
|
def append_static_images(self, obj_str: str = "rub", load_str: str = "on"):
|
||||||
|
super().append_static_images(obj_str, load_str)
|
147
spip2md/lib.py
Normal file
147
spip2md/lib.py
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
"""
|
||||||
|
This file is part of spip2md.
|
||||||
|
Copyright (C) 2023 LCPQ/Guilhem Fauré
|
||||||
|
|
||||||
|
spip2md is free software: you can redistribute it and/or modify it under the terms of
|
||||||
|
the GNU General Public License version 2 as published by the Free Software Foundation.
|
||||||
|
|
||||||
|
spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
See the GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along with spip2md.
|
||||||
|
If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from os import makedirs, remove
|
||||||
|
from os.path import isfile
|
||||||
|
from shutil import rmtree
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from spip2md.config import CFG, NAME
|
||||||
|
from spip2md.extended_models import (
|
||||||
|
DeepDict,
|
||||||
|
DontExportDraftError,
|
||||||
|
IgnoredPatternError,
|
||||||
|
LangNotFoundError,
|
||||||
|
Section,
|
||||||
|
)
|
||||||
|
from spip2md.spip_models import DB
|
||||||
|
from spip2md.style import BOLD, esc
|
||||||
|
|
||||||
|
# Define loggers for this file
|
||||||
|
ROOTLOG = logging.getLogger(NAME + ".root")
|
||||||
|
TREELOG = logging.getLogger(NAME + ".tree")
|
||||||
|
# Initialize the database with settings from CFG
|
||||||
|
DB.init(CFG.db, host=CFG.db_host, user=CFG.db_user, password=CFG.db_pass)
|
||||||
|
|
||||||
|
|
||||||
|
# Write the root sections and their subtrees
|
||||||
|
def write_root(parent_dir: str, parent_id: int = 0) -> DeepDict:
|
||||||
|
# Print starting message
|
||||||
|
print(
|
||||||
|
f"""\
|
||||||
|
Begin exporting {esc(BOLD)}{CFG.db}@{CFG.db_host}{esc()} SPIP database to plain \
|
||||||
|
Markdown+YAML files,
|
||||||
|
into the directory {esc(BOLD)}{parent_dir}{esc()}, \
|
||||||
|
as database user {esc(BOLD)}{CFG.db_user}{esc()}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
buffer: list[DeepDict] = [] # Define temporary storage for output
|
||||||
|
# Write each sections (write their entire subtree) for each export language
|
||||||
|
# Language specified in database can differ from markup, se we force a language
|
||||||
|
# and remove irrelevant ones at each looping
|
||||||
|
for lang in CFG.export_languages:
|
||||||
|
ROOTLOG.debug("Initialize root sections")
|
||||||
|
# Get all sections of parentID ROOTID
|
||||||
|
child_sections: tuple[Section, ...] = (
|
||||||
|
Section.select()
|
||||||
|
.where(Section.id_parent == parent_id)
|
||||||
|
.order_by(Section.date.desc())
|
||||||
|
)
|
||||||
|
nb: int = len(child_sections)
|
||||||
|
for i, s in enumerate(child_sections):
|
||||||
|
ROOTLOG.debug(f"Begin exporting {lang} root section {i}/{nb}")
|
||||||
|
try:
|
||||||
|
buffer.append(s.write_all(-1, CFG.output_dir, i, nb, lang))
|
||||||
|
except LangNotFoundError as err:
|
||||||
|
ROOTLOG.debug(err) # Log the message
|
||||||
|
except DontExportDraftError as err: # Will happen if not CFG.export_drafts
|
||||||
|
ROOTLOG.debug(err) # Log the message
|
||||||
|
except IgnoredPatternError as err:
|
||||||
|
ROOTLOG.debug(err) # Log the message
|
||||||
|
print() # Break line between level 0 sections in output
|
||||||
|
ROOTLOG.debug(
|
||||||
|
f"Finished exporting {lang} root section {i}/{nb} {s._url_title}"
|
||||||
|
)
|
||||||
|
return {"sections": buffer}
|
||||||
|
|
||||||
|
|
||||||
|
# Count on outputted tree & print results if finished
|
||||||
|
def summarize(
|
||||||
|
tree: DeepDict | list[DeepDict] | list[str],
|
||||||
|
depth: int = -1,
|
||||||
|
prevkey: Optional[str] = None,
|
||||||
|
counter: Optional[dict[str, int]] = None,
|
||||||
|
) -> dict[str, int]:
|
||||||
|
if counter is None:
|
||||||
|
counter = {}
|
||||||
|
# __import__("pprint").pprint(tree) # DEBUG
|
||||||
|
if type(tree) == dict:
|
||||||
|
for key, sub in tree.items():
|
||||||
|
if type(sub) == list:
|
||||||
|
counter = summarize(sub, depth + 1, key, counter)
|
||||||
|
# if type of sub is str, it’s just the name, don’t count
|
||||||
|
if type(tree) == list:
|
||||||
|
for sub in tree:
|
||||||
|
if prevkey is not None:
|
||||||
|
if prevkey not in counter:
|
||||||
|
counter[prevkey] = 0
|
||||||
|
counter[prevkey] += 1
|
||||||
|
if type(sub) == dict:
|
||||||
|
counter = summarize(sub, depth + 1, None, counter)
|
||||||
|
|
||||||
|
# End message only if it’s the root one
|
||||||
|
if depth == -1:
|
||||||
|
TREELOG.debug(tree)
|
||||||
|
totals: str = ""
|
||||||
|
for key, val in counter.items():
|
||||||
|
totals += f"{esc(BOLD)}{val}{esc()} {key}, "
|
||||||
|
print(f"Exported a total of {totals[:-2]}")
|
||||||
|
# Warn about issued warnings in log file
|
||||||
|
if isfile(CFG.logfile):
|
||||||
|
print(
|
||||||
|
f"Logging level was set to {esc(BOLD)}{CFG.loglevel}{esc()}, there are"
|
||||||
|
+ f" warnings and informations in {esc(BOLD)}{CFG.logfile}{esc()}"
|
||||||
|
)
|
||||||
|
return counter
|
||||||
|
|
||||||
|
|
||||||
|
# Clear the previous log file if needed, then configure logging
|
||||||
|
def init_logging(**kwargs) -> None:
|
||||||
|
if CFG.clear_log and isfile(CFG.logfile):
|
||||||
|
remove(CFG.logfile)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
encoding="utf-8", filename=CFG.logfile, level=CFG.loglevel, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# return logging.getLogger(CFG.logname)
|
||||||
|
|
||||||
|
|
||||||
|
# Clear the output dir if needed & create a new
|
||||||
|
def clear_output() -> None:
|
||||||
|
if CFG.clear_output:
|
||||||
|
rmtree(CFG.output_dir, True)
|
||||||
|
makedirs(CFG.output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# When directly executed as a script
|
||||||
|
def cli():
|
||||||
|
init_logging() # Initialize logging and logfile
|
||||||
|
clear_output() # Eventually remove already existing output dir
|
||||||
|
|
||||||
|
with DB: # Connect to the database where SPIP site is stored in this block
|
||||||
|
# Write everything while printing the output human-readably
|
||||||
|
summarize(write_root(CFG.output_dir))
|
@ -16,7 +16,7 @@ If not, see <https://www.gnu.org/licenses/>.
|
|||||||
# pyright: strict
|
# pyright: strict
|
||||||
from re import I, S, compile
|
from re import I, S, compile
|
||||||
|
|
||||||
LANGCODES = ("en", "fr", "es") # And more …
|
from spip2md.config import CFG
|
||||||
|
|
||||||
# ((SPIP syntax, Replacement Markdown syntax), …)
|
# ((SPIP syntax, Replacement Markdown syntax), …)
|
||||||
SPIP_MARKDOWN = (
|
SPIP_MARKDOWN = (
|
||||||
@ -167,7 +167,7 @@ SECTION_LINK = (
|
|||||||
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
|
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
|
||||||
CONFIG_LANGS = { # lang of configuration: (match against this lang)
|
CONFIG_LANGS = { # lang of configuration: (match against this lang)
|
||||||
lang: compile(r"\[ *" + lang + r" *\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I)
|
lang: compile(r"\[ *" + lang + r" *\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I)
|
||||||
for lang in LANGCODES
|
for lang in CFG.export_languages
|
||||||
}
|
}
|
||||||
# MULTILANGS = compile( # Matches agains all langs of multi blocks
|
# MULTILANGS = compile( # Matches agains all langs of multi blocks
|
||||||
# r"\[([a-zA-Z\-]{2,6})\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I
|
# r"\[([a-zA-Z\-]{2,6})\]\s*(.+?)\s*(?=\[[a-zA-Z\-]{2,6}\]|$)", S | I
|
||||||
@ -316,7 +316,10 @@ SPECIAL_OUTPUT = (
|
|||||||
compile(r"^([0-9]+?\.)(?= )"), # Counter
|
compile(r"^([0-9]+?\.)(?= )"), # Counter
|
||||||
compile(r"(?<= )(->)(?= )"), # Arrow
|
compile(r"(?<= )(->)(?= )"), # Arrow
|
||||||
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
|
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
|
||||||
) + tuple(compile(r"(?<=level [0-9] )(" + language + r" )") for language in LANGCODES)
|
) + tuple(
|
||||||
|
compile(r"(?<=level [0-9] )(" + language + r" )")
|
||||||
|
for language in CFG.export_languages
|
||||||
|
)
|
||||||
|
|
||||||
# Warning elements in terminal output to highlight
|
# Warning elements in terminal output to highlight
|
||||||
WARNING_OUTPUT = (
|
WARNING_OUTPUT = (
|
||||||
|
@ -12,9 +12,6 @@ See the GNU General Public License for more details.
|
|||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with spip2md.
|
You should have received a copy of the GNU General Public License along with spip2md.
|
||||||
If not, see <https://www.gnu.org/licenses/>.
|
If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
This file defines Spip database as objects, attributes and methods with Peewee
|
|
||||||
"""
|
"""
|
||||||
# type: ignore
|
# type: ignore
|
||||||
from peewee import (
|
from peewee import (
|
||||||
@ -32,12 +29,17 @@ from peewee import (
|
|||||||
TextField,
|
TextField,
|
||||||
)
|
)
|
||||||
|
|
||||||
DB: MySQLDatabase = MySQLDatabase(None)
|
DB = MySQLDatabase(None)
|
||||||
|
|
||||||
|
|
||||||
|
# class UnknownField(object):
|
||||||
|
# def __init__(self, *_, **__):
|
||||||
|
# pass
|
||||||
|
|
||||||
|
|
||||||
class BaseModel(Model):
|
class BaseModel(Model):
|
||||||
class Meta:
|
class Meta:
|
||||||
database = DB
|
database: MySQLDatabase = DB
|
||||||
|
|
||||||
|
|
||||||
class SpipArticles(BaseModel):
|
class SpipArticles(BaseModel):
|
||||||
@ -72,7 +74,7 @@ class SpipArticles(BaseModel):
|
|||||||
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_articles"
|
table_name: str = "spip_articles"
|
||||||
indexes = ((("statut", "date"), False),)
|
indexes = ((("statut", "date"), False),)
|
||||||
|
|
||||||
|
|
||||||
@ -105,7 +107,7 @@ class SpipAuteurs(BaseModel):
|
|||||||
webmestre = CharField(constraints=[SQL("DEFAULT 'non'")])
|
webmestre = CharField(constraints=[SQL("DEFAULT 'non'")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_auteurs"
|
table_name: str = "spip_auteurs"
|
||||||
|
|
||||||
|
|
||||||
class SpipAuteursLiens(BaseModel):
|
class SpipAuteursLiens(BaseModel):
|
||||||
@ -115,7 +117,7 @@ class SpipAuteursLiens(BaseModel):
|
|||||||
vu = CharField(constraints=[SQL("DEFAULT 'non'")])
|
vu = CharField(constraints=[SQL("DEFAULT 'non'")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_auteurs_liens"
|
table_name: str = "spip_auteurs_liens"
|
||||||
indexes = ((("id_auteur", "id_objet", "objet"), True),)
|
indexes = ((("id_auteur", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_auteur", "id_objet", "objet")
|
primary_key = CompositeKey("id_auteur", "id_objet", "objet")
|
||||||
|
|
||||||
@ -135,7 +137,7 @@ class SpipBreves(BaseModel):
|
|||||||
titre = TextField()
|
titre = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_breves"
|
table_name: str = "spip_breves"
|
||||||
|
|
||||||
|
|
||||||
class SpipDepots(BaseModel):
|
class SpipDepots(BaseModel):
|
||||||
@ -155,7 +157,7 @@ class SpipDepots(BaseModel):
|
|||||||
xml_paquets = CharField(constraints=[SQL("DEFAULT ''")])
|
xml_paquets = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_depots"
|
table_name: str = "spip_depots"
|
||||||
|
|
||||||
|
|
||||||
class SpipDepotsPlugins(BaseModel):
|
class SpipDepotsPlugins(BaseModel):
|
||||||
@ -163,7 +165,7 @@ class SpipDepotsPlugins(BaseModel):
|
|||||||
id_plugin = BigIntegerField()
|
id_plugin = BigIntegerField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_depots_plugins"
|
table_name: str = "spip_depots_plugins"
|
||||||
indexes = ((("id_depot", "id_plugin"), True),)
|
indexes = ((("id_depot", "id_plugin"), True),)
|
||||||
primary_key = CompositeKey("id_depot", "id_plugin")
|
primary_key = CompositeKey("id_depot", "id_plugin")
|
||||||
|
|
||||||
@ -189,7 +191,7 @@ class SpipDocuments(BaseModel):
|
|||||||
titre = TextField()
|
titre = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_documents"
|
table_name: str = "spip_documents"
|
||||||
|
|
||||||
|
|
||||||
class SpipDocumentsLiens(BaseModel):
|
class SpipDocumentsLiens(BaseModel):
|
||||||
@ -199,7 +201,7 @@ class SpipDocumentsLiens(BaseModel):
|
|||||||
vu = CharField(constraints=[SQL("DEFAULT 'non'")])
|
vu = CharField(constraints=[SQL("DEFAULT 'non'")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_documents_liens"
|
table_name: str = "spip_documents_liens"
|
||||||
indexes = ((("id_document", "id_objet", "objet"), True),)
|
indexes = ((("id_document", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_document", "id_objet", "objet")
|
primary_key = CompositeKey("id_document", "id_objet", "objet")
|
||||||
|
|
||||||
@ -229,7 +231,7 @@ class SpipEvenements(BaseModel):
|
|||||||
titre = TextField()
|
titre = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_evenements"
|
table_name: str = "spip_evenements"
|
||||||
|
|
||||||
|
|
||||||
class SpipEvenementsParticipants(BaseModel):
|
class SpipEvenementsParticipants(BaseModel):
|
||||||
@ -242,7 +244,7 @@ class SpipEvenementsParticipants(BaseModel):
|
|||||||
reponse = CharField(constraints=[SQL("DEFAULT '?'")])
|
reponse = CharField(constraints=[SQL("DEFAULT '?'")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_evenements_participants"
|
table_name: str = "spip_evenements_participants"
|
||||||
|
|
||||||
|
|
||||||
class SpipForum(BaseModel):
|
class SpipForum(BaseModel):
|
||||||
@ -265,7 +267,7 @@ class SpipForum(BaseModel):
|
|||||||
url_site = TextField()
|
url_site = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_forum"
|
table_name: str = "spip_forum"
|
||||||
indexes = ((("statut", "id_parent", "id_objet", "objet", "date_heure"), False),)
|
indexes = ((("statut", "id_parent", "id_objet", "objet", "date_heure"), False),)
|
||||||
|
|
||||||
|
|
||||||
@ -283,7 +285,7 @@ class SpipGroupesMots(BaseModel):
|
|||||||
unseul = CharField(constraints=[SQL("DEFAULT ''")])
|
unseul = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_groupes_mots"
|
table_name: str = "spip_groupes_mots"
|
||||||
|
|
||||||
|
|
||||||
class SpipJobs(BaseModel):
|
class SpipJobs(BaseModel):
|
||||||
@ -298,7 +300,7 @@ class SpipJobs(BaseModel):
|
|||||||
status = IntegerField(constraints=[SQL("DEFAULT 1")], index=True)
|
status = IntegerField(constraints=[SQL("DEFAULT 1")], index=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_jobs"
|
table_name: str = "spip_jobs"
|
||||||
|
|
||||||
|
|
||||||
class SpipJobsLiens(BaseModel):
|
class SpipJobsLiens(BaseModel):
|
||||||
@ -307,7 +309,7 @@ class SpipJobsLiens(BaseModel):
|
|||||||
objet = CharField(constraints=[SQL("DEFAULT ''")])
|
objet = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_jobs_liens"
|
table_name: str = "spip_jobs_liens"
|
||||||
indexes = ((("id_job", "id_objet", "objet"), True),)
|
indexes = ((("id_job", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_job", "id_objet", "objet")
|
primary_key = CompositeKey("id_job", "id_objet", "objet")
|
||||||
|
|
||||||
@ -321,7 +323,7 @@ class SpipMeslettres(BaseModel):
|
|||||||
url_txt = CharField()
|
url_txt = CharField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_meslettres"
|
table_name: str = "spip_meslettres"
|
||||||
|
|
||||||
|
|
||||||
class SpipMessages(BaseModel):
|
class SpipMessages(BaseModel):
|
||||||
@ -338,7 +340,7 @@ class SpipMessages(BaseModel):
|
|||||||
type = CharField(constraints=[SQL("DEFAULT ''")])
|
type = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_messages"
|
table_name: str = "spip_messages"
|
||||||
|
|
||||||
|
|
||||||
class SpipMeta(BaseModel):
|
class SpipMeta(BaseModel):
|
||||||
@ -348,7 +350,7 @@ class SpipMeta(BaseModel):
|
|||||||
valeur = TextField(null=True)
|
valeur = TextField(null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_meta"
|
table_name: str = "spip_meta"
|
||||||
|
|
||||||
|
|
||||||
class SpipMots(BaseModel):
|
class SpipMots(BaseModel):
|
||||||
@ -362,7 +364,7 @@ class SpipMots(BaseModel):
|
|||||||
type = TextField()
|
type = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_mots"
|
table_name: str = "spip_mots"
|
||||||
|
|
||||||
|
|
||||||
class SpipMotsLiens(BaseModel):
|
class SpipMotsLiens(BaseModel):
|
||||||
@ -371,7 +373,7 @@ class SpipMotsLiens(BaseModel):
|
|||||||
objet = CharField(constraints=[SQL("DEFAULT ''")], index=True)
|
objet = CharField(constraints=[SQL("DEFAULT ''")], index=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_mots_liens"
|
table_name: str = "spip_mots_liens"
|
||||||
indexes = ((("id_mot", "id_objet", "objet"), True),)
|
indexes = ((("id_mot", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_mot", "id_objet", "objet")
|
primary_key = CompositeKey("id_mot", "id_objet", "objet")
|
||||||
|
|
||||||
@ -384,7 +386,7 @@ class SpipOrthoCache(BaseModel):
|
|||||||
suggest = TextField()
|
suggest = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_ortho_cache"
|
table_name: str = "spip_ortho_cache"
|
||||||
indexes = ((("lang", "mot"), True),)
|
indexes = ((("lang", "mot"), True),)
|
||||||
primary_key = CompositeKey("lang", "mot")
|
primary_key = CompositeKey("lang", "mot")
|
||||||
|
|
||||||
@ -396,7 +398,7 @@ class SpipOrthoDico(BaseModel):
|
|||||||
mot = CharField()
|
mot = CharField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_ortho_dico"
|
table_name: str = "spip_ortho_dico"
|
||||||
indexes = ((("lang", "mot"), True),)
|
indexes = ((("lang", "mot"), True),)
|
||||||
primary_key = CompositeKey("lang", "mot")
|
primary_key = CompositeKey("lang", "mot")
|
||||||
|
|
||||||
@ -441,7 +443,7 @@ class SpipPaquets(BaseModel):
|
|||||||
version_base = CharField(constraints=[SQL("DEFAULT ''")])
|
version_base = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_paquets"
|
table_name: str = "spip_paquets"
|
||||||
|
|
||||||
|
|
||||||
class SpipPetitions(BaseModel):
|
class SpipPetitions(BaseModel):
|
||||||
@ -456,7 +458,7 @@ class SpipPetitions(BaseModel):
|
|||||||
texte = TextField()
|
texte = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_petitions"
|
table_name: str = "spip_petitions"
|
||||||
|
|
||||||
|
|
||||||
class SpipPlugins(BaseModel):
|
class SpipPlugins(BaseModel):
|
||||||
@ -473,7 +475,7 @@ class SpipPlugins(BaseModel):
|
|||||||
vmax = CharField(constraints=[SQL("DEFAULT ''")])
|
vmax = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_plugins"
|
table_name: str = "spip_plugins"
|
||||||
|
|
||||||
|
|
||||||
class SpipReferers(BaseModel):
|
class SpipReferers(BaseModel):
|
||||||
@ -486,7 +488,7 @@ class SpipReferers(BaseModel):
|
|||||||
visites_veille = IntegerField(constraints=[SQL("DEFAULT 0")])
|
visites_veille = IntegerField(constraints=[SQL("DEFAULT 0")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_referers"
|
table_name: str = "spip_referers"
|
||||||
|
|
||||||
|
|
||||||
class SpipReferersArticles(BaseModel):
|
class SpipReferersArticles(BaseModel):
|
||||||
@ -497,7 +499,7 @@ class SpipReferersArticles(BaseModel):
|
|||||||
visites = IntegerField()
|
visites = IntegerField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_referers_articles"
|
table_name: str = "spip_referers_articles"
|
||||||
indexes = ((("id_article", "referer_md5"), True),)
|
indexes = ((("id_article", "referer_md5"), True),)
|
||||||
primary_key = CompositeKey("id_article", "referer_md5")
|
primary_key = CompositeKey("id_article", "referer_md5")
|
||||||
|
|
||||||
@ -511,7 +513,7 @@ class SpipResultats(BaseModel):
|
|||||||
table_objet = CharField(constraints=[SQL("DEFAULT ''")])
|
table_objet = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_resultats"
|
table_name: str = "spip_resultats"
|
||||||
primary_key = False
|
primary_key = False
|
||||||
|
|
||||||
|
|
||||||
@ -535,7 +537,7 @@ class SpipRubriques(BaseModel):
|
|||||||
titre = TextField()
|
titre = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_rubriques"
|
table_name: str = "spip_rubriques"
|
||||||
|
|
||||||
|
|
||||||
class SpipSignatures(BaseModel):
|
class SpipSignatures(BaseModel):
|
||||||
@ -551,7 +553,7 @@ class SpipSignatures(BaseModel):
|
|||||||
url_site = TextField()
|
url_site = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_signatures"
|
table_name: str = "spip_signatures"
|
||||||
|
|
||||||
|
|
||||||
class SpipSyndic(BaseModel):
|
class SpipSyndic(BaseModel):
|
||||||
@ -575,7 +577,7 @@ class SpipSyndic(BaseModel):
|
|||||||
url_syndic = TextField()
|
url_syndic = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_syndic"
|
table_name: str = "spip_syndic"
|
||||||
indexes = ((("statut", "date_syndic"), False),)
|
indexes = ((("statut", "date_syndic"), False),)
|
||||||
|
|
||||||
|
|
||||||
@ -595,14 +597,14 @@ class SpipSyndicArticles(BaseModel):
|
|||||||
url_source = TextField()
|
url_source = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_syndic_articles"
|
table_name: str = "spip_syndic_articles"
|
||||||
|
|
||||||
|
|
||||||
class SpipTest(BaseModel):
|
class SpipTest(BaseModel):
|
||||||
a = IntegerField(null=True)
|
a = IntegerField(null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_test"
|
table_name: str = "spip_test"
|
||||||
primary_key = False
|
primary_key = False
|
||||||
|
|
||||||
|
|
||||||
@ -617,7 +619,7 @@ class SpipTypesDocuments(BaseModel):
|
|||||||
upload = CharField(constraints=[SQL("DEFAULT 'oui'")])
|
upload = CharField(constraints=[SQL("DEFAULT 'oui'")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_types_documents"
|
table_name: str = "spip_types_documents"
|
||||||
|
|
||||||
|
|
||||||
class SpipUrls(BaseModel):
|
class SpipUrls(BaseModel):
|
||||||
@ -630,7 +632,7 @@ class SpipUrls(BaseModel):
|
|||||||
url = CharField()
|
url = CharField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_urls"
|
table_name: str = "spip_urls"
|
||||||
indexes = (
|
indexes = (
|
||||||
(("id_parent", "url"), True),
|
(("id_parent", "url"), True),
|
||||||
(("type", "id_objet"), False),
|
(("type", "id_objet"), False),
|
||||||
@ -649,7 +651,7 @@ class SpipVersions(BaseModel):
|
|||||||
titre_version = TextField()
|
titre_version = TextField()
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_versions"
|
table_name: str = "spip_versions"
|
||||||
indexes = ((("id_version", "id_objet", "objet"), True),)
|
indexes = ((("id_version", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_objet", "id_version", "objet")
|
primary_key = CompositeKey("id_objet", "id_version", "objet")
|
||||||
|
|
||||||
@ -664,7 +666,7 @@ class SpipVersionsFragments(BaseModel):
|
|||||||
version_min = IntegerField(constraints=[SQL("DEFAULT 0")])
|
version_min = IntegerField(constraints=[SQL("DEFAULT 0")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_versions_fragments"
|
table_name: str = "spip_versions_fragments"
|
||||||
indexes = ((("id_objet", "objet", "id_fragment", "version_min"), True),)
|
indexes = ((("id_objet", "objet", "id_fragment", "version_min"), True),)
|
||||||
primary_key = CompositeKey("id_fragment", "id_objet", "objet", "version_min")
|
primary_key = CompositeKey("id_fragment", "id_objet", "objet", "version_min")
|
||||||
|
|
||||||
@ -675,7 +677,7 @@ class SpipVisites(BaseModel):
|
|||||||
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_visites"
|
table_name: str = "spip_visites"
|
||||||
|
|
||||||
|
|
||||||
class SpipVisitesArticles(BaseModel):
|
class SpipVisitesArticles(BaseModel):
|
||||||
@ -685,7 +687,7 @@ class SpipVisitesArticles(BaseModel):
|
|||||||
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
visites = IntegerField(constraints=[SQL("DEFAULT 0")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_visites_articles"
|
table_name: str = "spip_visites_articles"
|
||||||
indexes = ((("date", "id_article"), True),)
|
indexes = ((("date", "id_article"), True),)
|
||||||
primary_key = CompositeKey("date", "id_article")
|
primary_key = CompositeKey("date", "id_article")
|
||||||
|
|
||||||
@ -699,7 +701,7 @@ class SpipZones(BaseModel):
|
|||||||
titre = CharField(constraints=[SQL("DEFAULT ''")])
|
titre = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_zones"
|
table_name: str = "spip_zones"
|
||||||
|
|
||||||
|
|
||||||
class SpipZonesLiens(BaseModel):
|
class SpipZonesLiens(BaseModel):
|
||||||
@ -708,11 +710,6 @@ class SpipZonesLiens(BaseModel):
|
|||||||
objet = CharField(constraints=[SQL("DEFAULT ''")])
|
objet = CharField(constraints=[SQL("DEFAULT ''")])
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "spip_zones_liens"
|
table_name: str = "spip_zones_liens"
|
||||||
indexes = ((("id_zone", "id_objet", "objet"), True),)
|
indexes = ((("id_zone", "id_objet", "objet"), True),)
|
||||||
primary_key = CompositeKey("id_objet", "id_zone", "objet")
|
primary_key = CompositeKey("id_objet", "id_zone", "objet")
|
||||||
|
|
||||||
|
|
||||||
# class UnknownField(object):
|
|
||||||
# def __init__(self, *_, **__):
|
|
||||||
# pass
|
|
||||||
|
@ -12,19 +12,7 @@ See the GNU General Public License for more details.
|
|||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with spip2md.
|
You should have received a copy of the GNU General Public License along with spip2md.
|
||||||
If not, see <https://www.gnu.org/licenses/>.
|
If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
This file contains functions needed to control this package from command line and to
|
|
||||||
define a printable classes which adds terminal printing capabilites to Spip objects
|
|
||||||
"""
|
"""
|
||||||
# pyright: strict
|
|
||||||
from os import makedirs
|
|
||||||
from shutil import rmtree
|
|
||||||
|
|
||||||
from spip2md.config import Configuration
|
|
||||||
from spip2md.spip_models import DB
|
|
||||||
from spip2md.write import WritableSite
|
|
||||||
|
|
||||||
# Define styles for terminal printing
|
# Define styles for terminal printing
|
||||||
BOLD = 1 # Bold
|
BOLD = 1 # Bold
|
||||||
ITALIC = 3 # Italic
|
ITALIC = 3 # Italic
|
||||||
@ -52,31 +40,3 @@ def esc(*args: int) -> str:
|
|||||||
params += str(a) + ";"
|
params += str(a) + ";"
|
||||||
# Base terminal escape sequence that needs to be closed by "m"
|
# Base terminal escape sequence that needs to be closed by "m"
|
||||||
return "\033[" + params[:-1] + "m"
|
return "\033[" + params[:-1] + "m"
|
||||||
|
|
||||||
|
|
||||||
# Extend Site class to add terminal output capabilities
|
|
||||||
class PrintableSite(WritableSite):
|
|
||||||
def write(self) -> str:
|
|
||||||
return "write path"
|
|
||||||
|
|
||||||
|
|
||||||
# Initialize DB database connection from config
|
|
||||||
def init_db(cfg: Configuration):
|
|
||||||
DB.init( # type: ignore
|
|
||||||
cfg.db, host=cfg.db_host, user=cfg.db_user, password=cfg.db_pass
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main(*argv: str):
|
|
||||||
cfg = Configuration(*argv) # Get the configuration
|
|
||||||
|
|
||||||
init_db(cfg)
|
|
||||||
|
|
||||||
# Eventually remove already existing output dir
|
|
||||||
if cfg.clear_output:
|
|
||||||
rmtree(cfg.output_dir, True)
|
|
||||||
makedirs(cfg.output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
with DB: # Connect to the database where SPIP site is stored in this block
|
|
||||||
# Write everything while printing the output human-readably
|
|
||||||
PrintableSite(cfg).write()
|
|
@ -1,16 +0,0 @@
|
|||||||
# File for test purposes, mainly in interactive python
|
|
||||||
from spip2md.config import Configuration
|
|
||||||
from spip2md.convert import (
|
|
||||||
ConvertableSite,
|
|
||||||
)
|
|
||||||
from spip2md.spip_models import DB
|
|
||||||
|
|
||||||
cfg = Configuration() # Get the configuration
|
|
||||||
|
|
||||||
DB.init( # type: ignore
|
|
||||||
cfg.db, host=cfg.db_host, user=cfg.db_user, password=cfg.db_pass
|
|
||||||
)
|
|
||||||
|
|
||||||
SITE = ConvertableSite(cfg)
|
|
||||||
|
|
||||||
ID = ("document", 1293)
|
|
@ -1,24 +0,0 @@
|
|||||||
"""
|
|
||||||
This file is part of spip2md.
|
|
||||||
Copyright (C) 2023 LCPQ/Guilhem Fauré
|
|
||||||
|
|
||||||
spip2md is free software: you can redistribute it and/or modify it under the terms of
|
|
||||||
the GNU General Public License version 2 as published by the Free Software Foundation.
|
|
||||||
|
|
||||||
spip2md is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
without even the implied warranty of MERCHANTABILITY or
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE.
|
|
||||||
See the GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along with spip2md.
|
|
||||||
If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
|
|
||||||
This file adds write to disk capabilities to spip objects
|
|
||||||
"""
|
|
||||||
from spip2md.convert import ConvertableSite
|
|
||||||
|
|
||||||
|
|
||||||
class WritableSite(ConvertableSite):
|
|
||||||
def write(self) -> str:
|
|
||||||
return "write path"
|
|
Loading…
Reference in New Issue
Block a user