feat : settings to ignore empty objects or ignore objects of which titles matches a regex
This commit is contained in:
parent
9aa81efa60
commit
e1cd926078
16
README.md
16
README.md
@ -71,30 +71,38 @@ program with a `spip2md.yml` file in your working directory.
|
||||
Here’s the *default configuration options* with comments explaining their meaning :
|
||||
|
||||
```yaml
|
||||
# Data source settings
|
||||
db: spip # Name of the database
|
||||
db_host: localhost # Host of the database
|
||||
db_user: spip # The database user
|
||||
db_pass: password # The database password
|
||||
data_dir: data # The directory in which SPIP images & files are stored
|
||||
|
||||
# Data destination settings
|
||||
export_languages: ["en"] # Array of languages to export, two letter lang code
|
||||
# If set, directories will be created only for this language, according to this
|
||||
# language’s titles. Other languages will be written along with correct url: attribute
|
||||
storage_language: null
|
||||
output_dir: output/ # The directory in which files will be written
|
||||
|
||||
# Destination directories names settings
|
||||
prepend_h1: false # Add title of articles as Markdown h1, looks better on certain themes
|
||||
# Prepend ID to directory slug, preventing collisions
|
||||
# If false, a counter will be appended in case of name collision
|
||||
prepend_id: false
|
||||
prepend_lang: false # Prepend lang of the object to directory slug (prenvents collision)
|
||||
export_drafts: true # Should we export drafts
|
||||
remove_html: true # Should we clean remaining HTML blocks
|
||||
title_max_length: 40 # Maximum length of a single filename
|
||||
|
||||
# Ignored data settings
|
||||
export_drafts: true # Should we export drafts
|
||||
export_empty: true # Should we export empty articles
|
||||
ignore_patterns: [] # List of regexes : Matching sections or articles will be ignored
|
||||
|
||||
# Text body processing settings
|
||||
remove_html: true # Should we clean remaining HTML blocks
|
||||
unknown_char_replacement: ?? # String to replace broken encoding that cannot be repaired
|
||||
|
||||
# You probably don’t want to modify the settings below
|
||||
|
||||
# Settings you probably don’t want to modify
|
||||
clear_log: true # Clear logfile between runs instead of appending to
|
||||
clear_output: true # Clear output dir between runs instead of merging into
|
||||
|
||||
|
@ -69,12 +69,13 @@ class Configuration:
|
||||
prepend_id: bool = False # Add the ID of object before slug
|
||||
prepend_lang: bool = False # Add the lang of object before slug
|
||||
export_drafts: bool = True # Should we export drafts as draft:true articles
|
||||
export_empty: bool = True # Should we export empty articles
|
||||
remove_html: bool = True # Should spip2md remove every HTML tags
|
||||
title_max_length: int = 40 # Maximum length of a single title for directory names
|
||||
unknown_char_replacement: str = "??" # Replaces unknown characters
|
||||
clear_log: bool = True # Clear log before every run instead of appending to
|
||||
clear_output: bool = True # Remove eventual output dir before running
|
||||
ignore_pattern: list[str] = [] # Ignore objects of which title match
|
||||
ignore_patterns: list[str] = [] # Ignore objects of which title match
|
||||
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
||||
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
||||
export_filetype: str = "md" # Extension of exported text files
|
||||
|
@ -217,7 +217,11 @@ class SpipWritable:
|
||||
if type(message) is FileNotFoundError:
|
||||
output += "ERROR: NOT FOUND: "
|
||||
elif type(message) is DoesNotExist:
|
||||
output += "ERROR: NO DESTINATION DIR "
|
||||
output += "ERROR: NO DESTINATION DIR: "
|
||||
elif type(message) is DontExportDraftError:
|
||||
output += "ERROR: NOT EXPORTING DRAFT: "
|
||||
elif type(message) is DontExportEmptyError:
|
||||
output += "ERROR: NOT EXPORTING EMPTY: "
|
||||
elif type(message) is not str:
|
||||
output += "ERROR: UNKNOWN: "
|
||||
# Print the output as the program goes
|
||||
@ -244,6 +248,7 @@ class SpipWritable:
|
||||
except (
|
||||
LangNotFoundError,
|
||||
DontExportDraftError,
|
||||
DontExportEmptyError,
|
||||
IgnoredPatternError,
|
||||
FileNotFoundError,
|
||||
) as err:
|
||||
@ -322,6 +327,10 @@ class DontExportDraftError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DontExportEmptyError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SpipRedactional(SpipWritable):
|
||||
id_trad: BigIntegerField | BigAutoField | int
|
||||
id_rubrique: BigAutoField | int
|
||||
@ -502,7 +511,7 @@ class SpipRedactional(SpipWritable):
|
||||
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
|
||||
self._storage_title = self.convert_field(self._storage_title)
|
||||
self._url_title = self.convert_field(self._url_title)
|
||||
for p in CFG.ignore_pattern:
|
||||
for p in CFG.ignore_patterns:
|
||||
for title in (self._storage_title, self._url_title):
|
||||
m = match(p, title, I)
|
||||
if m is not None:
|
||||
@ -603,6 +612,8 @@ class SpipRedactional(SpipWritable):
|
||||
if len(self._text) > 0:
|
||||
# Remove remaining HTML after & append to body
|
||||
body += "\n\n" + self._text
|
||||
elif not CFG.export_empty:
|
||||
raise DontExportEmptyError
|
||||
# Same with an "extra" section
|
||||
if len(self._extra) > 0:
|
||||
body += "\n\n# EXTRA\n\n" + self._extra
|
||||
@ -634,6 +645,7 @@ class SpipRedactional(SpipWritable):
|
||||
except (
|
||||
LangNotFoundError,
|
||||
DontExportDraftError,
|
||||
DontExportEmptyError,
|
||||
IgnoredPatternError,
|
||||
) as err:
|
||||
LOG.debug(err)
|
||||
|
Loading…
Reference in New Issue
Block a user