better handling of filenames conflicts + differences between path and url, cleaning, removed interface
This commit is contained in:
parent
be4b5166d7
commit
df7e0df1cf
@ -31,14 +31,14 @@ class Configuration:
|
|||||||
unknown_char_replacement: str = "??" # Replaces unknown characters
|
unknown_char_replacement: str = "??" # Replaces unknown characters
|
||||||
clear_log: bool = True # Clear log before every run instead of appending to
|
clear_log: bool = True # Clear log before every run instead of appending to
|
||||||
clear_output: bool = True # Remove eventual output dir before running
|
clear_output: bool = True # Remove eventual output dir before running
|
||||||
conflict_strategy: str = "prepend id" # Prepend or append : date, id or counter
|
|
||||||
ignore_pattern: list[str] = [] # Ignore objects of which title match
|
ignore_pattern: list[str] = [] # Ignore objects of which title match
|
||||||
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
||||||
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
||||||
logname: str = "spip2md" # Labelling of logs
|
logname: str = "spip2md" # Labelling of logs
|
||||||
export_filetype: str = "md" # Extension of exported text files
|
export_filetype: str = "md" # Extension of exported text files
|
||||||
max_articles_export: int = 1000 # TODO reimplement
|
title_max_length: int = 40 # Maximum length of a single title for directory names
|
||||||
max_sections_export: int = 500 # TODO reimplement
|
# max_articles_export: int = 1000 # TODO reimplement
|
||||||
|
# max_sections_export: int = 500 # TODO reimplement
|
||||||
|
|
||||||
def __init__(self, config_file: Optional[str] = None):
|
def __init__(self, config_file: Optional[str] = None):
|
||||||
if config_file is not None:
|
if config_file is not None:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
# SPIP website to plain Markdown files converter, Copyright (C) 2023 Guilhem Fauré
|
||||||
import logging
|
import logging
|
||||||
from os import listdir, makedirs
|
from os import listdir, mkdir
|
||||||
from os.path import basename, isfile, splitext
|
from os.path import basename, isfile, splitext
|
||||||
from re import I, Match, Pattern, finditer, match, search
|
from re import I, Match, Pattern, finditer, match, search
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
@ -50,7 +50,7 @@ DeepDict = dict[str, "list[DeepDict] | list[str] | str"]
|
|||||||
LOG = logging.getLogger(CFG.logname + ".models")
|
LOG = logging.getLogger(CFG.logname + ".models")
|
||||||
|
|
||||||
|
|
||||||
class SpipInterface:
|
class WritableObject:
|
||||||
# From SPIP database
|
# From SPIP database
|
||||||
texte: str
|
texte: str
|
||||||
lang: str
|
lang: str
|
||||||
@ -59,34 +59,16 @@ class SpipInterface:
|
|||||||
statut: str
|
statut: str
|
||||||
profondeur: int
|
profondeur: int
|
||||||
# Converted fields
|
# Converted fields
|
||||||
_title: str
|
_storage_title: str # Title with which directories names are built
|
||||||
_draft: bool
|
_draft: bool
|
||||||
# Additional fields
|
# Additional fields
|
||||||
_id: BigAutoField | int = 0 # same ID attribute name for all objects
|
_id: BigAutoField | int = 0 # same ID attribute name for all objects
|
||||||
# _id: BigIntegerField | int = 0 # same ID attribute name for all objects
|
|
||||||
# _depth: IntegerField | int # Equals `profondeur` for sections
|
|
||||||
_depth: int # Equals `profondeur` for sections
|
_depth: int # Equals `profondeur` for sections
|
||||||
_fileprefix: str # String to prepend to written files
|
_fileprefix: str # String to prepend to written files
|
||||||
_parentdir: str # Path from output dir to direct parent
|
_storage_parentdir: str # Path from output dir to direct parent
|
||||||
_storage_parentdir: Optional[str] = None
|
|
||||||
_storage_title: Optional[str] = None
|
|
||||||
_url: Optional[str] = None # In case URL in frontmatter different of dest dir
|
|
||||||
_style: tuple[int, ...] # _styles to apply to some elements of printed output
|
_style: tuple[int, ...] # _styles to apply to some elements of printed output
|
||||||
# memo: dict[str, str] = {} # Memoïze values
|
_storage_title_append: int = 0 # Append a number to storage title if > 0
|
||||||
|
|
||||||
def dest_directory(self) -> str:
|
|
||||||
raise NotImplementedError("Subclasses need to implement directory()")
|
|
||||||
|
|
||||||
def dest_filename(self, prepend: str = "", append: str = "") -> str:
|
|
||||||
raise NotImplementedError(
|
|
||||||
f"Subclasses need to implement dest_filename(), params:{prepend}{append}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def dest_path(self) -> str:
|
|
||||||
return self.dest_directory() + self.dest_filename()
|
|
||||||
|
|
||||||
|
|
||||||
class WritableObject(SpipInterface):
|
|
||||||
# Apply a mapping from regex maps
|
# Apply a mapping from regex maps
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def apply_mapping(text: str, mapping: tuple) -> str:
|
def apply_mapping(text: str, mapping: tuple) -> str:
|
||||||
@ -162,10 +144,21 @@ class WritableObject(SpipInterface):
|
|||||||
|
|
||||||
# Apply post-init conversions and cancel the export if self not of the right lang
|
# Apply post-init conversions and cancel the export if self not of the right lang
|
||||||
def convert(self) -> None:
|
def convert(self) -> None:
|
||||||
self._title = self.convert_field(self.titre)
|
self._storage_title = self.convert_field(self.titre)
|
||||||
if not CFG.export_drafts and self._draft:
|
if not CFG.export_drafts and self._draft:
|
||||||
raise DontExportDraftError(f"{self.titre} is a draft, cancelling export")
|
raise DontExportDraftError(f"{self.titre} is a draft, cancelling export")
|
||||||
|
|
||||||
|
def dest_directory(self) -> str:
|
||||||
|
raise NotImplementedError("Subclasses need to implement directory()")
|
||||||
|
|
||||||
|
def dest_filename(self, prepend: str = "", append: str = "") -> str:
|
||||||
|
raise NotImplementedError(
|
||||||
|
f"Subclasses need to implement dest_filename(), params:{prepend}{append}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def dest_path(self) -> str:
|
||||||
|
return self.dest_directory() + self.dest_filename()
|
||||||
|
|
||||||
# Print one or more line(s) in which special elements are stylized
|
# Print one or more line(s) in which special elements are stylized
|
||||||
def style_print(
|
def style_print(
|
||||||
self, string: str, indent: Optional[str] = " ", end: str = "\n"
|
self, string: str, indent: Optional[str] = " ", end: str = "\n"
|
||||||
@ -194,10 +187,10 @@ class WritableObject(SpipInterface):
|
|||||||
self.style_print(counter)
|
self.style_print(counter)
|
||||||
# Output the counter & title of the object being exported
|
# Output the counter & title of the object being exported
|
||||||
msg: str = f"{index + 1}. "
|
msg: str = f"{index + 1}. "
|
||||||
if len(self._title) == 0:
|
if len(self._storage_title) == 0:
|
||||||
msg += "EMPTY NAME"
|
msg += "EMPTY NAME"
|
||||||
else:
|
else:
|
||||||
msg += self._title
|
msg += self._storage_title
|
||||||
# Print the output as the program goes
|
# Print the output as the program goes
|
||||||
# LOG.debug(f"Begin exporting {type(self).__name__} {output[-1]}")
|
# LOG.debug(f"Begin exporting {type(self).__name__} {output[-1]}")
|
||||||
self.style_print(msg, end="")
|
self.style_print(msg, end="")
|
||||||
@ -225,20 +218,24 @@ class WritableObject(SpipInterface):
|
|||||||
def write_all(
|
def write_all(
|
||||||
self,
|
self,
|
||||||
parentdepth: int,
|
parentdepth: int,
|
||||||
parentdir: str,
|
storage_parentdir: str,
|
||||||
index: int,
|
index: int,
|
||||||
total: int,
|
total: int,
|
||||||
storage_parentdir: Optional[str] = None,
|
parenturl: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
LOG.debug(f"Writing {type(self).__name__} `{self._title}`")
|
LOG.debug(f"Writing {type(self).__name__} `{self._storage_title}`")
|
||||||
self._depth = parentdepth + 1
|
self._depth = parentdepth + 1
|
||||||
self._parentdir = parentdir
|
self._storage_parentdir = storage_parentdir
|
||||||
if storage_parentdir:
|
self._parenturl = parenturl
|
||||||
self._storage_parentdir = storage_parentdir
|
|
||||||
output: str = self.begin_message(index, total)
|
output: str = self.begin_message(index, total)
|
||||||
try:
|
try:
|
||||||
output += self.end_message(self.write())
|
output += self.end_message(self.write())
|
||||||
except Exception as err:
|
except (
|
||||||
|
LangNotFoundError,
|
||||||
|
DontExportDraftError,
|
||||||
|
IgnoredPatternError,
|
||||||
|
FileNotFoundError,
|
||||||
|
) as err:
|
||||||
output += self.end_message(err)
|
output += self.end_message(err)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@ -264,9 +261,9 @@ class Document(WritableObject, SpipDocuments):
|
|||||||
def dest_directory(self, prepend: str = "", append: str = "") -> str:
|
def dest_directory(self, prepend: str = "", append: str = "") -> str:
|
||||||
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
return (
|
return (
|
||||||
self._parentdir
|
self._storage_parentdir
|
||||||
+ prepend
|
+ prepend
|
||||||
+ slugify(_id + self._title, max_length=100)
|
+ slugify(_id + self._storage_title, max_length=100)
|
||||||
+ append
|
+ append
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -284,15 +281,21 @@ class Document(WritableObject, SpipDocuments):
|
|||||||
def write_all(
|
def write_all(
|
||||||
self,
|
self,
|
||||||
parentdepth: int,
|
parentdepth: int,
|
||||||
parentdir: str,
|
storage_parentdir: str,
|
||||||
index: int,
|
index: int,
|
||||||
total: int,
|
total: int,
|
||||||
forcedlang: str,
|
forcedlang: Optional[str] = None,
|
||||||
storage_parentdir: Optional[str],
|
parenturl: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
self.convert() # Apply post-init conversions
|
self.convert() # Apply post-init conversions
|
||||||
|
LOG.debug(
|
||||||
|
f"Document {self._storage_title} doesn’t care about forcedlang {forcedlang}"
|
||||||
|
)
|
||||||
|
LOG.debug(
|
||||||
|
f"Document {self._storage_title} doesn’t care about parenturl {parenturl}"
|
||||||
|
)
|
||||||
return super().write_all(
|
return super().write_all(
|
||||||
parentdepth, parentdir, index, total, storage_parentdir
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -319,10 +322,14 @@ class RedactionalObject(WritableObject):
|
|||||||
langue_choisie: str
|
langue_choisie: str
|
||||||
# Converted
|
# Converted
|
||||||
_text: str
|
_text: str
|
||||||
|
_url_title: str # Title in metadata of articles
|
||||||
|
_parenturl: str # URL relative to lang to direct parent
|
||||||
|
|
||||||
# Get rid of other lang than forced in text and modify lang to forced if found
|
# Get rid of other lang than forced in text and modify lang to forced if found
|
||||||
def translate_multi(self, forced_lang: str, text: str) -> str:
|
def translate_multi(
|
||||||
LOG.debug(f"Translating <multi> blocks of `{self._title}`")
|
self, forced_lang: str, text: str, change_lang: bool = True
|
||||||
|
) -> str:
|
||||||
|
# LOG.debug(f"Translating <multi> blocks of `{self._url_title}`")
|
||||||
# for each <multi> blocks, keep only forced lang
|
# for each <multi> blocks, keep only forced lang
|
||||||
lang: Optional[Match[str]] = None
|
lang: Optional[Match[str]] = None
|
||||||
for block in MULTILANG_BLOCK.finditer(text):
|
for block in MULTILANG_BLOCK.finditer(text):
|
||||||
@ -331,16 +338,17 @@ class RedactionalObject(WritableObject):
|
|||||||
# Log the translation
|
# Log the translation
|
||||||
trans: str = lang.group(1)[:50].strip()
|
trans: str = lang.group(1)[:50].strip()
|
||||||
LOG.debug(
|
LOG.debug(
|
||||||
f"Keeping {forced_lang} translation of `{self._title}`: "
|
f"Keeping {forced_lang} translation of `{self._url_title}`: "
|
||||||
+ f"`{trans}`, becoming its new lang"
|
+ f"`{trans}`"
|
||||||
)
|
)
|
||||||
self.lang = forced_lang # So write-all will not be cancelled
|
if change_lang:
|
||||||
if self.id_trad == 0: # Assign translation key to id so hugo can link
|
self.lang = forced_lang # So write-all will not be cancelled
|
||||||
self.id_trad = self._id
|
if self.id_trad == 0: # Assign translation key to id for Hugo
|
||||||
|
self.id_trad = self._id
|
||||||
# Replace the mutli blocks with the text in the proper lang
|
# Replace the mutli blocks with the text in the proper lang
|
||||||
text = text.replace(block.group(), lang.group(1))
|
text = text.replace(block.group(), lang.group(1))
|
||||||
if lang is None:
|
if lang is None:
|
||||||
LOG.debug(f"{forced_lang} not found in `{self._title}`")
|
LOG.debug(f"{forced_lang} not found in `{self._url_title}`")
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def replace_links(self, text: str) -> str:
|
def replace_links(self, text: str) -> str:
|
||||||
@ -391,7 +399,7 @@ class RedactionalObject(WritableObject):
|
|||||||
for link, getobj, repl in LinkMappings():
|
for link, getobj, repl in LinkMappings():
|
||||||
# LOG.debug(f"Looking for {link} in {text}")
|
# LOG.debug(f"Looking for {link} in {text}")
|
||||||
for m in link.finditer(text):
|
for m in link.finditer(text):
|
||||||
LOG.debug(f"Found internal link {m.group()} in {self._title}")
|
LOG.debug(f"Found internal link {m.group()} in {self._url_title}")
|
||||||
try:
|
try:
|
||||||
LOG.debug(f"Searching for object of id {m.group(2)} with {getobj}")
|
LOG.debug(f"Searching for object of id {m.group(2)} with {getobj}")
|
||||||
o: "Document | Article | Section" = getobj(int(m.group(2)))
|
o: "Document | Article | Section" = getobj(int(m.group(2)))
|
||||||
@ -406,54 +414,45 @@ class RedactionalObject(WritableObject):
|
|||||||
print(repl, m.group(1), o.dest_filename())
|
print(repl, m.group(1), o.dest_filename())
|
||||||
raise err
|
raise err
|
||||||
else:
|
else:
|
||||||
repl = repl.format(o._title, o.dest_filename())
|
repl = repl.format(o._storage_title, o.dest_filename())
|
||||||
LOG.debug(f"Translate link {m.group()} to {repl} in {self._title}")
|
LOG.debug(
|
||||||
|
f"Translate link {m.group()} to {repl} in {self._url_title}"
|
||||||
|
)
|
||||||
text = text.replace(m.group(), repl)
|
text = text.replace(m.group(), repl)
|
||||||
except DoesNotExist:
|
except DoesNotExist:
|
||||||
LOG.warn(f"No object for link {m.group()} in {self._title}")
|
LOG.warn(f"No object for link {m.group()} in {self._url_title}")
|
||||||
text = text.replace(m.group(), repl.format("", "NOT FOUND"), 1)
|
text = text.replace(m.group(), repl.format("", "NOT FOUND"), 1)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# Modify this object’s title to prevent filename conflicts
|
# Get this object url, or none if it’s the same as directory
|
||||||
def conflict_title(self, conflict: str) -> None:
|
def url(self) -> str:
|
||||||
if CFG.conflict_strategy == "prepend id":
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
title: str = str(self._id) + "_" + self._title
|
counter: str = (
|
||||||
elif CFG.conflict_strategy == "append id":
|
"_" + str(self._storage_title_append)
|
||||||
title: str = self._title + "_" + str(self._id)
|
if self._storage_title_append > 0
|
||||||
elif CFG.conflict_strategy == "prepend counter":
|
else ""
|
||||||
m = match(r"([0-9]+)_" + self._title, conflict)
|
)
|
||||||
if m is not None:
|
# Return none if url will be the same as directory
|
||||||
title: str = str(int(m.group(1)) + 1) + "_" + self._title
|
return (
|
||||||
else:
|
self._parenturl
|
||||||
title: str = "1_" + self._title
|
+ slugify(_id + self._url_title, max_length=CFG.title_max_length)
|
||||||
else: # Defaults to append counter
|
+ counter
|
||||||
m = match(self._title + r"_([0-9]+)$", conflict)
|
+ r"/"
|
||||||
if m is not None:
|
)
|
||||||
title: str = self._title + "_" + str(int(m.group(1)) + 1)
|
|
||||||
else:
|
|
||||||
title: str = self._title + "_1"
|
|
||||||
LOG.debug(f"Rewriting {self._title} title to {title}")
|
|
||||||
self._title = title
|
|
||||||
|
|
||||||
# Get slugified directory of this object
|
# Get slugified directory of this object
|
||||||
def dest_directory(self) -> str:
|
def dest_directory(self) -> str:
|
||||||
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
_id: str = str(self._id) + "-" if CFG.prepend_id else ""
|
||||||
slug: str = slugify(_id + self._title, max_length=100)
|
counter: str = (
|
||||||
directory: str = self._parentdir + slug
|
"_" + str(self._storage_title_append)
|
||||||
if self._storage_title is not None or self._storage_parentdir is not None:
|
if self._storage_title_append > 0
|
||||||
self._url = directory
|
else ""
|
||||||
directory: str = (
|
)
|
||||||
self._storage_parentdir
|
directory: str = self._storage_parentdir + slugify(
|
||||||
if self._storage_parentdir is not None
|
_id + self._storage_title,
|
||||||
else self._parentdir
|
max_length=CFG.title_max_length,
|
||||||
+ slugify(
|
)
|
||||||
_id + self._storage_title
|
return directory + counter + r"/"
|
||||||
if self._storage_title is not None
|
|
||||||
else self._title,
|
|
||||||
max_length=100,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return directory + r"/"
|
|
||||||
|
|
||||||
# Get filename of this object
|
# Get filename of this object
|
||||||
def dest_filename(self) -> str:
|
def dest_filename(self) -> str:
|
||||||
@ -462,64 +461,84 @@ class RedactionalObject(WritableObject):
|
|||||||
def convert_title(self, forced_lang: str) -> None:
|
def convert_title(self, forced_lang: str) -> None:
|
||||||
LOG.debug(f"Convert title of currently untitled {type(self).__name__}")
|
LOG.debug(f"Convert title of currently untitled {type(self).__name__}")
|
||||||
if hasattr(self, "_title"):
|
if hasattr(self, "_title"):
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} _title is already set")
|
LOG.debug(f"{type(self).__name__} {self._url_title} _title is already set")
|
||||||
return
|
return
|
||||||
if self.titre is None:
|
if self.titre is None:
|
||||||
LOG.debug(f"{type(self).__name__} title is None")
|
LOG.debug(f"{type(self).__name__} title is None")
|
||||||
self._title = ""
|
self._url_title = ""
|
||||||
return
|
return
|
||||||
if len(self.titre) == 0:
|
if len(self.titre) == 0:
|
||||||
LOG.debug(f"{type(self).__name__} title is empty")
|
LOG.debug(f"{type(self).__name__} title is empty")
|
||||||
self._title = ""
|
self._url_title = ""
|
||||||
return
|
return
|
||||||
self._title = self.titre.strip()
|
self._url_title = self.titre.strip()
|
||||||
# Keep storage language title to store it
|
# Set storage title to language of storage lang if different
|
||||||
if CFG.storage_language is not None and CFG.storage_language != forced_lang:
|
storage_lang: str = (
|
||||||
self._storage_title = self.translate_multi(
|
CFG.storage_language if CFG.storage_language is not None else forced_lang
|
||||||
CFG.storage_language, self._title
|
)
|
||||||
)
|
LOG.debug(
|
||||||
self._storage_title = self.convert_field(self._storage_title)
|
f"Searching for {storage_lang} in <multi> blocks of `{self._url_title}`"
|
||||||
self._title = self.translate_multi(forced_lang, self._title)
|
+ " storage title"
|
||||||
LOG.debug(f"Convert internal links of {self.lang} `{self._title}` title")
|
)
|
||||||
self._title = self.replace_links(self._title)
|
self._storage_title = self.translate_multi(
|
||||||
LOG.debug(f"Apply conversions to {self.lang} `{self._title}` title")
|
storage_lang,
|
||||||
self._title = self.convert_field(self._title)
|
self._url_title,
|
||||||
|
False,
|
||||||
|
)
|
||||||
|
LOG.debug(
|
||||||
|
f"Searching for {forced_lang} in <multi> blocks of `{self._url_title}`"
|
||||||
|
+ " URL title"
|
||||||
|
)
|
||||||
|
self._url_title = self.translate_multi(forced_lang, self._url_title)
|
||||||
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` title")
|
||||||
|
self._storage_title = self.replace_links(self._storage_title)
|
||||||
|
self._url_title = self.replace_links(self._url_title)
|
||||||
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
|
||||||
|
self._storage_title = self.convert_field(self._storage_title)
|
||||||
|
self._url_title = self.convert_field(self._url_title)
|
||||||
|
for p in CFG.ignore_pattern:
|
||||||
|
for title in (self._storage_title, self._url_title):
|
||||||
|
m = match(p, title, I)
|
||||||
|
if m is not None:
|
||||||
|
raise IgnoredPatternError(
|
||||||
|
f"{self._url_title} matches with ignore pattern {p}, ignoring"
|
||||||
|
)
|
||||||
|
|
||||||
def convert_text(self, forced_lang: str) -> None:
|
def convert_text(self, forced_lang: str) -> None:
|
||||||
LOG.debug(f"Convert text of `{self._title}`")
|
LOG.debug(f"Convert text of `{self._url_title}`")
|
||||||
if hasattr(self, "_text"):
|
if hasattr(self, "_text"):
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} _text is already set")
|
LOG.debug(f"{type(self).__name__} {self._url_title} _text is already set")
|
||||||
return
|
return
|
||||||
if self.texte is None:
|
if self.texte is None:
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} text is None")
|
LOG.debug(f"{type(self).__name__} {self._url_title} text is None")
|
||||||
self._text = ""
|
self._text = ""
|
||||||
return
|
return
|
||||||
if len(self.texte) == 0:
|
if len(self.texte) == 0:
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} text is empty")
|
LOG.debug(f"{type(self).__name__} {self._url_title} text is empty")
|
||||||
self._text = ""
|
self._text = ""
|
||||||
return
|
return
|
||||||
self._text = self.translate_multi(forced_lang, self.texte.strip())
|
self._text = self.translate_multi(forced_lang, self.texte.strip())
|
||||||
LOG.debug(f"Convert internal links of {self.lang} `{self._title}` text")
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` text")
|
||||||
self._text = self.replace_links(self._text)
|
self._text = self.replace_links(self._text)
|
||||||
LOG.debug(f"Apply conversions to {self.lang} `{self._title}` text")
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` text")
|
||||||
self._text = self.convert_field(self._text)
|
self._text = self.convert_field(self._text)
|
||||||
|
|
||||||
def convert_extra(self) -> None:
|
def convert_extra(self) -> None:
|
||||||
LOG.debug(f"Convert extra of `{self._title}`")
|
LOG.debug(f"Convert extra of `{self._url_title}`")
|
||||||
if hasattr(self, "_extra"):
|
if hasattr(self, "_extra"):
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} _extra is already set")
|
LOG.debug(f"{type(self).__name__} {self._url_title} _extra is already set")
|
||||||
return
|
return
|
||||||
if self.extra is None:
|
if self.extra is None:
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} extra is None")
|
LOG.debug(f"{type(self).__name__} {self._url_title} extra is None")
|
||||||
self._extra = ""
|
self._extra = ""
|
||||||
return
|
return
|
||||||
if len(self.extra) == 0:
|
if len(self.extra) == 0:
|
||||||
LOG.debug(f"{type(self).__name__} {self._title} extra is empty")
|
LOG.debug(f"{type(self).__name__} {self._url_title} extra is empty")
|
||||||
self._extra = ""
|
self._extra = ""
|
||||||
return
|
return
|
||||||
LOG.debug(f"Convert internal links of {self.lang} `{self._title}` extra")
|
LOG.debug(f"Convert internal links of {self.lang} `{self._url_title}` extra")
|
||||||
self._extra = self.replace_links(self._extra)
|
self._extra = self.replace_links(self._extra)
|
||||||
LOG.debug(f"Apply conversions to {self.lang} `{self._title}` extra")
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` extra")
|
||||||
self._extra = self.convert_field(self._extra)
|
self._extra = self.convert_field(self._extra)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -529,7 +548,7 @@ class RedactionalObject(WritableObject):
|
|||||||
|
|
||||||
# Get related documents
|
# Get related documents
|
||||||
def documents(self) -> tuple[Document]:
|
def documents(self) -> tuple[Document]:
|
||||||
LOG.debug(f"Initialize documents of `{self._title}`")
|
LOG.debug(f"Initialize documents of `{self._url_title}`")
|
||||||
documents = (
|
documents = (
|
||||||
Document.select()
|
Document.select()
|
||||||
.join(
|
.join(
|
||||||
@ -546,7 +565,7 @@ class RedactionalObject(WritableObject):
|
|||||||
meta: dict[str, Any] = {
|
meta: dict[str, Any] = {
|
||||||
"lang": self.lang,
|
"lang": self.lang,
|
||||||
"translationKey": self.id_trad,
|
"translationKey": self.id_trad,
|
||||||
"title": self._title,
|
"title": self._url_title,
|
||||||
"publishDate": self.date,
|
"publishDate": self.date,
|
||||||
"lastmod": self.maj,
|
"lastmod": self.maj,
|
||||||
"draft": self._draft,
|
"draft": self._draft,
|
||||||
@ -555,8 +574,9 @@ class RedactionalObject(WritableObject):
|
|||||||
"spip_id_secteur": self.id_secteur,
|
"spip_id_secteur": self.id_secteur,
|
||||||
"spip_id": self._id,
|
"spip_id": self._id,
|
||||||
}
|
}
|
||||||
if self._url is not None:
|
# Add url if different of directory
|
||||||
meta = meta | {"url": self._url}
|
if self.url() not in self.dest_directory():
|
||||||
|
meta = meta | {"url": self.url()}
|
||||||
if append is not None:
|
if append is not None:
|
||||||
return dump(meta | append, allow_unicode=True)
|
return dump(meta | append, allow_unicode=True)
|
||||||
else:
|
else:
|
||||||
@ -568,8 +588,8 @@ class RedactionalObject(WritableObject):
|
|||||||
# Start the content with frontmatter
|
# Start the content with frontmatter
|
||||||
body: str = "---\n" + self.frontmatter() + "---"
|
body: str = "---\n" + self.frontmatter() + "---"
|
||||||
# Add the title as a Markdown h1
|
# Add the title as a Markdown h1
|
||||||
if len(self._title) > 0 and CFG.prepend_h1:
|
if self._url_title is not None and len(self._url_title) > 0 and CFG.prepend_h1:
|
||||||
body += "\n\n# " + self._title
|
body += "\n\n# " + self._url_title
|
||||||
# If there is a text, add the text preceded by two line breaks
|
# If there is a text, add the text preceded by two line breaks
|
||||||
if len(self._text) > 0:
|
if len(self._text) > 0:
|
||||||
# Remove remaining HTML after & append to body
|
# Remove remaining HTML after & append to body
|
||||||
@ -584,9 +604,8 @@ class RedactionalObject(WritableObject):
|
|||||||
self,
|
self,
|
||||||
children: tuple[Document] | tuple[Any],
|
children: tuple[Document] | tuple[Any],
|
||||||
forcedlang: str,
|
forcedlang: str,
|
||||||
storage_parentdir: Optional[str] = None,
|
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
LOG.debug(f"Writing documents of {type(self).__name__} `{self._title}`")
|
LOG.debug(f"Writing documents of {type(self).__name__} `{self._url_title}`")
|
||||||
output: list[str] = []
|
output: list[str] = []
|
||||||
total = len(children)
|
total = len(children)
|
||||||
i = 0
|
i = 0
|
||||||
@ -599,40 +618,54 @@ class RedactionalObject(WritableObject):
|
|||||||
i,
|
i,
|
||||||
total,
|
total,
|
||||||
forcedlang,
|
forcedlang,
|
||||||
storage_parentdir,
|
self.url(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
i += 1
|
i += 1
|
||||||
except LangNotFoundError as err:
|
except (
|
||||||
LOG.debug(err)
|
LangNotFoundError,
|
||||||
except DontExportDraftError as err:
|
DontExportDraftError,
|
||||||
LOG.debug(err)
|
IgnoredPatternError,
|
||||||
except IgnoredPatternError as err:
|
) as err:
|
||||||
LOG.debug(err)
|
LOG.debug(err)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
# Write object to output destination
|
# Write object to output destination
|
||||||
def write(self) -> str:
|
def write(self) -> str:
|
||||||
# Make a directory for this object if there isn’t
|
# Make a directory for this object if there isn’t
|
||||||
directory: str = self.dest_directory()
|
# If it cannot for incompatibility, try until it can
|
||||||
try:
|
incompatible: bool = True
|
||||||
makedirs(directory)
|
while incompatible:
|
||||||
except FileExistsError:
|
directory: str = self.dest_directory()
|
||||||
# Create a new directory if write is about to overwrite an existing file
|
try:
|
||||||
# or to write into a directory without the same fileprefix
|
mkdir(directory)
|
||||||
for file in listdir(directory):
|
break
|
||||||
LOG.debug(
|
except FileExistsError:
|
||||||
f"Testing if {type(self).__name__} `{self.dest_path()}` of prefix "
|
# If not stated incompatible with the following, will write in this dir
|
||||||
+ f"{self._fileprefix} can be written along with `{file}` "
|
incompatible = False
|
||||||
+ f"of prefix `{file.split('.')[0]}` in `{self.dest_directory()}`"
|
# Create a new directory if write is about to overwrite an existing file
|
||||||
)
|
# or to write into a directory without the same fileprefix
|
||||||
if isfile(directory + file) and (
|
for file in listdir(directory):
|
||||||
directory + file == self.dest_path()
|
if isfile(directory + file):
|
||||||
or file.split(".")[0] != self._fileprefix
|
LOG.debug(
|
||||||
):
|
f"Can {type(self).__name__} `{self.dest_path()}` of prefix "
|
||||||
self.conflict_title(directory.split("/")[-1])
|
+ f"{self._fileprefix} and suffix {CFG.export_filetype}"
|
||||||
makedirs(self.dest_directory())
|
+ f" be written along with `{file}` of prefix "
|
||||||
break
|
+ f"`{file.split('.')[0]}` and suffix {file.split('.')[-1]}"
|
||||||
|
+ f"` in {self.dest_directory()}` ?"
|
||||||
|
)
|
||||||
|
# Resolve conflict at first incompatible file encountered
|
||||||
|
if directory + file == self.dest_path() or (
|
||||||
|
file.split(".")[-1] == CFG.export_filetype
|
||||||
|
and file.split(".")[0] != self._fileprefix
|
||||||
|
):
|
||||||
|
LOG.debug(
|
||||||
|
f"No, incrementing counter of {self.dest_directory()}"
|
||||||
|
)
|
||||||
|
self._storage_title_append += 1
|
||||||
|
incompatible = True
|
||||||
|
break
|
||||||
|
|
||||||
# Write the content of this object into a file named as self.filename()
|
# Write the content of this object into a file named as self.filename()
|
||||||
with open(self.dest_path(), "w") as f:
|
with open(self.dest_path(), "w") as f:
|
||||||
f.write(self.content())
|
f.write(self.content())
|
||||||
@ -641,17 +674,11 @@ class RedactionalObject(WritableObject):
|
|||||||
# Apply post-init conversions and cancel the export if self not of the right lang
|
# Apply post-init conversions and cancel the export if self not of the right lang
|
||||||
def convert(self, forced_lang: str) -> None:
|
def convert(self, forced_lang: str) -> None:
|
||||||
self.convert_title(forced_lang)
|
self.convert_title(forced_lang)
|
||||||
for p in CFG.ignore_pattern:
|
|
||||||
m = match(p, self._title, I)
|
|
||||||
if m is not None:
|
|
||||||
raise IgnoredPatternError(
|
|
||||||
f"{self._title} is matching with ignore pattern {p}, ignoring"
|
|
||||||
)
|
|
||||||
self.convert_text(forced_lang)
|
self.convert_text(forced_lang)
|
||||||
self.convert_extra()
|
self.convert_extra()
|
||||||
if self.lang != forced_lang:
|
if self.lang != forced_lang:
|
||||||
raise LangNotFoundError(
|
raise LangNotFoundError(
|
||||||
f"`{self._title}` lang is {self.lang} instead of the wanted"
|
f"`{self._url_title}` lang is {self.lang} instead of the wanted"
|
||||||
+ f" {forced_lang} and it don’t contains"
|
+ f" {forced_lang} and it don’t contains"
|
||||||
+ f" {forced_lang} translation in Markup either"
|
+ f" {forced_lang} translation in Markup either"
|
||||||
)
|
)
|
||||||
@ -705,7 +732,7 @@ class Article(RedactionalObject, SpipArticles):
|
|||||||
return body
|
return body
|
||||||
|
|
||||||
def authors(self) -> list[SpipAuteurs]:
|
def authors(self) -> list[SpipAuteurs]:
|
||||||
LOG.debug(f"Initialize authors of `{self._title}`")
|
LOG.debug(f"Initialize authors of `{self._url_title}`")
|
||||||
return (
|
return (
|
||||||
SpipAuteurs.select()
|
SpipAuteurs.select()
|
||||||
.join(
|
.join(
|
||||||
@ -719,20 +746,18 @@ class Article(RedactionalObject, SpipArticles):
|
|||||||
def write_all(
|
def write_all(
|
||||||
self,
|
self,
|
||||||
parentdepth: int,
|
parentdepth: int,
|
||||||
parentdir: str,
|
storage_parentdir: str,
|
||||||
index: int,
|
index: int,
|
||||||
total: int,
|
total: int,
|
||||||
forced_lang: str,
|
forced_lang: str,
|
||||||
storage_parentdir: Optional[str] = None,
|
parenturl: str,
|
||||||
) -> DeepDict:
|
) -> DeepDict:
|
||||||
self.convert(forced_lang)
|
self.convert(forced_lang)
|
||||||
return {
|
return {
|
||||||
"msg": super().write_all(
|
"msg": super().write_all(
|
||||||
parentdepth, parentdir, index, total, storage_parentdir
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
),
|
|
||||||
"documents": self.write_children(
|
|
||||||
self.documents(), forced_lang, storage_parentdir
|
|
||||||
),
|
),
|
||||||
|
"documents": self.write_children(self.documents(), forced_lang),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -756,7 +781,7 @@ class Section(RedactionalObject, SpipRubriques):
|
|||||||
|
|
||||||
# Get articles of this section
|
# Get articles of this section
|
||||||
def articles(self, limit: int = 10**6) -> tuple[Article]:
|
def articles(self, limit: int = 10**6) -> tuple[Article]:
|
||||||
LOG.debug(f"Initialize articles of `{self._title}`")
|
LOG.debug(f"Initialize articles of `{self._url_title}`")
|
||||||
return (
|
return (
|
||||||
Article.select()
|
Article.select()
|
||||||
.where(Article.id_rubrique == self._id)
|
.where(Article.id_rubrique == self._id)
|
||||||
@ -766,7 +791,7 @@ class Section(RedactionalObject, SpipRubriques):
|
|||||||
|
|
||||||
# Get subsections of this section
|
# Get subsections of this section
|
||||||
def sections(self, limit: int = 10**6) -> tuple[Self]:
|
def sections(self, limit: int = 10**6) -> tuple[Self]:
|
||||||
LOG.debug(f"Initialize subsections of `{self._title}`")
|
LOG.debug(f"Initialize subsections of `{self._url_title}`")
|
||||||
return (
|
return (
|
||||||
Section.select()
|
Section.select()
|
||||||
.where(Section.id_parent == self._id)
|
.where(Section.id_parent == self._id)
|
||||||
@ -783,24 +808,18 @@ class Section(RedactionalObject, SpipRubriques):
|
|||||||
def write_all(
|
def write_all(
|
||||||
self,
|
self,
|
||||||
parentdepth: int,
|
parentdepth: int,
|
||||||
parentdir: str,
|
storage_parentdir: str,
|
||||||
index: int,
|
index: int,
|
||||||
total: int,
|
total: int,
|
||||||
forced_lang: str,
|
forced_lang: str,
|
||||||
storage_parentdir: Optional[str] = None,
|
parenturl: str = "",
|
||||||
) -> DeepDict:
|
) -> DeepDict:
|
||||||
self.convert(forced_lang)
|
self.convert(forced_lang)
|
||||||
return {
|
return {
|
||||||
"msg": super().write_all(
|
"msg": super().write_all(
|
||||||
parentdepth, parentdir, index, total, storage_parentdir
|
parentdepth, storage_parentdir, index, total, parenturl
|
||||||
),
|
|
||||||
"documents": self.write_children(
|
|
||||||
self.documents(), forced_lang, storage_parentdir
|
|
||||||
),
|
|
||||||
"articles": self.write_children(
|
|
||||||
self.articles(), forced_lang, storage_parentdir
|
|
||||||
),
|
|
||||||
"sections": self.write_children(
|
|
||||||
self.sections(), forced_lang, storage_parentdir
|
|
||||||
),
|
),
|
||||||
|
"documents": self.write_children(self.documents(), forced_lang),
|
||||||
|
"articles": self.write_children(self.articles(), forced_lang),
|
||||||
|
"sections": self.write_children(self.sections(), forced_lang),
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,9 @@ as database user {esc(BOLD)}{CFG.db_user}{esc()}
|
|||||||
except IgnoredPatternError as err:
|
except IgnoredPatternError as err:
|
||||||
ROOTLOG.debug(err) # Log the message
|
ROOTLOG.debug(err) # Log the message
|
||||||
print() # Break line between level 0 sections in output
|
print() # Break line between level 0 sections in output
|
||||||
ROOTLOG.debug(f"Finished exporting {lang} root section {i}/{nb} {s._title}")
|
ROOTLOG.debug(
|
||||||
|
f"Finished exporting {lang} root section {i}/{nb} {s._url_title}"
|
||||||
|
)
|
||||||
return {"sections": buffer}
|
return {"sections": buffer}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user