feat : settings to ignore empty objects or ignore objects of which titles matches a regex

This commit is contained in:
Guilhem Fauré 2023-06-16 16:09:46 +02:00
parent 9aa81efa60
commit e1cd926078
3 changed files with 28 additions and 7 deletions

View File

@ -71,30 +71,38 @@ program with a `spip2md.yml` file in your working directory.
Heres the *default configuration options*with comments explaining their meaning: Heres the *default configuration options*with comments explaining their meaning:
```yaml ```yaml
# Data source settings
db: spip # Name of the database db: spip # Name of the database
db_host: localhost # Host of the database db_host: localhost # Host of the database
db_user: spip # The database user db_user: spip # The database user
db_pass: password # The database password db_pass: password # The database password
data_dir: data # The directory in which SPIP images & files are stored data_dir: data # The directory in which SPIP images & files are stored
# Data destination settings
export_languages: ["en"] # Array of languages to export, two letter lang code export_languages: ["en"] # Array of languages to export, two letter lang code
# If set, directories will be created only for this language, according to this # If set, directories will be created only for this language, according to this
# languages titles. Other languages will be written along with correct url: attribute # languages titles. Other languages will be written along with correct url: attribute
storage_language: null storage_language: null
output_dir: output/ # The directory in which files will be written output_dir: output/ # The directory in which files will be written
# Destination directories names settings
prepend_h1: false # Add title of articles as Markdown h1, looks better on certain themes prepend_h1: false # Add title of articles as Markdown h1, looks better on certain themes
# Prepend ID to directory slug, preventing collisions # Prepend ID to directory slug, preventing collisions
# If false, a counter will be appended in case of name collision # If false, a counter will be appended in case of name collision
prepend_id: false prepend_id: false
prepend_lang: false # Prepend lang of the object to directory slug (prenvents collision) prepend_lang: false # Prepend lang of the object to directory slug (prenvents collision)
export_drafts: true # Should we export drafts
remove_html: true # Should we clean remaining HTML blocks
title_max_length: 40 # Maximum length of a single filename title_max_length: 40 # Maximum length of a single filename
# Ignored data settings
export_drafts: true # Should we export drafts
export_empty: true # Should we export empty articles
ignore_patterns: [] # List of regexes: Matching sections or articles will be ignored
# Text body processing settings
remove_html: true # Should we clean remaining HTML blocks
unknown_char_replacement: ?? # String to replace broken encoding that cannot be repaired unknown_char_replacement: ?? # String to replace broken encoding that cannot be repaired
# You probably dont want to modify the settings below # Settings you probably dont want to modify
clear_log: true # Clear logfile between runs instead of appending to clear_log: true # Clear logfile between runs instead of appending to
clear_output: true # Clear output dir between runs instead of merging into clear_output: true # Clear output dir between runs instead of merging into

View File

@ -69,12 +69,13 @@ class Configuration:
prepend_id: bool = False # Add the ID of object before slug prepend_id: bool = False # Add the ID of object before slug
prepend_lang: bool = False # Add the lang of object before slug prepend_lang: bool = False # Add the lang of object before slug
export_drafts: bool = True # Should we export drafts as draft:true articles export_drafts: bool = True # Should we export drafts as draft:true articles
export_empty: bool = True # Should we export empty articles
remove_html: bool = True # Should spip2md remove every HTML tags remove_html: bool = True # Should spip2md remove every HTML tags
title_max_length: int = 40 # Maximum length of a single title for directory names title_max_length: int = 40 # Maximum length of a single title for directory names
unknown_char_replacement: str = "??" # Replaces unknown characters unknown_char_replacement: str = "??" # Replaces unknown characters
clear_log: bool = True # Clear log before every run instead of appending to clear_log: bool = True # Clear log before every run instead of appending to
clear_output: bool = True # Remove eventual output dir before running clear_output: bool = True # Remove eventual output dir before running
ignore_pattern: list[str] = [] # Ignore objects of which title match ignore_patterns: list[str] = [] # Ignore objects of which title match
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
export_filetype: str = "md" # Extension of exported text files export_filetype: str = "md" # Extension of exported text files

View File

@ -217,7 +217,11 @@ class SpipWritable:
if type(message) is FileNotFoundError: if type(message) is FileNotFoundError:
output += "ERROR: NOT FOUND: " output += "ERROR: NOT FOUND: "
elif type(message) is DoesNotExist: elif type(message) is DoesNotExist:
output += "ERROR: NO DESTINATION DIR " output += "ERROR: NO DESTINATION DIR: "
elif type(message) is DontExportDraftError:
output += "ERROR: NOT EXPORTING DRAFT: "
elif type(message) is DontExportEmptyError:
output += "ERROR: NOT EXPORTING EMPTY: "
elif type(message) is not str: elif type(message) is not str:
output += "ERROR: UNKNOWN: " output += "ERROR: UNKNOWN: "
# Print the output as the program goes # Print the output as the program goes
@ -244,6 +248,7 @@ class SpipWritable:
except ( except (
LangNotFoundError, LangNotFoundError,
DontExportDraftError, DontExportDraftError,
DontExportEmptyError,
IgnoredPatternError, IgnoredPatternError,
FileNotFoundError, FileNotFoundError,
) as err: ) as err:
@ -322,6 +327,10 @@ class DontExportDraftError(Exception):
pass pass
class DontExportEmptyError(Exception):
pass
class SpipRedactional(SpipWritable): class SpipRedactional(SpipWritable):
id_trad: BigIntegerField | BigAutoField | int id_trad: BigIntegerField | BigAutoField | int
id_rubrique: BigAutoField | int id_rubrique: BigAutoField | int
@ -502,7 +511,7 @@ class SpipRedactional(SpipWritable):
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title") LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
self._storage_title = self.convert_field(self._storage_title) self._storage_title = self.convert_field(self._storage_title)
self._url_title = self.convert_field(self._url_title) self._url_title = self.convert_field(self._url_title)
for p in CFG.ignore_pattern: for p in CFG.ignore_patterns:
for title in (self._storage_title, self._url_title): for title in (self._storage_title, self._url_title):
m = match(p, title, I) m = match(p, title, I)
if m is not None: if m is not None:
@ -603,6 +612,8 @@ class SpipRedactional(SpipWritable):
if len(self._text) > 0: if len(self._text) > 0:
# Remove remaining HTML after & append to body # Remove remaining HTML after & append to body
body += "\n\n" + self._text body += "\n\n" + self._text
elif not CFG.export_empty:
raise DontExportEmptyError
# Same with an "extra" section # Same with an "extra" section
if len(self._extra) > 0: if len(self._extra) > 0:
body += "\n\n# EXTRA\n\n" + self._extra body += "\n\n# EXTRA\n\n" + self._extra
@ -634,6 +645,7 @@ class SpipRedactional(SpipWritable):
except ( except (
LangNotFoundError, LangNotFoundError,
DontExportDraftError, DontExportDraftError,
DontExportEmptyError,
IgnoredPatternError, IgnoredPatternError,
) as err: ) as err:
LOG.debug(err) LOG.debug(err)