feat : settings to ignore empty objects or ignore objects of which titles matches a regex
This commit is contained in:
parent
9aa81efa60
commit
e1cd926078
16
README.md
16
README.md
@ -71,30 +71,38 @@ program with a `spip2md.yml` file in your working directory.
|
|||||||
Here’s the *default configuration options* with comments explaining their meaning :
|
Here’s the *default configuration options* with comments explaining their meaning :
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
# Data source settings
|
||||||
db: spip # Name of the database
|
db: spip # Name of the database
|
||||||
db_host: localhost # Host of the database
|
db_host: localhost # Host of the database
|
||||||
db_user: spip # The database user
|
db_user: spip # The database user
|
||||||
db_pass: password # The database password
|
db_pass: password # The database password
|
||||||
data_dir: data # The directory in which SPIP images & files are stored
|
data_dir: data # The directory in which SPIP images & files are stored
|
||||||
|
|
||||||
|
# Data destination settings
|
||||||
export_languages: ["en"] # Array of languages to export, two letter lang code
|
export_languages: ["en"] # Array of languages to export, two letter lang code
|
||||||
# If set, directories will be created only for this language, according to this
|
# If set, directories will be created only for this language, according to this
|
||||||
# language’s titles. Other languages will be written along with correct url: attribute
|
# language’s titles. Other languages will be written along with correct url: attribute
|
||||||
storage_language: null
|
storage_language: null
|
||||||
output_dir: output/ # The directory in which files will be written
|
output_dir: output/ # The directory in which files will be written
|
||||||
|
|
||||||
|
# Destination directories names settings
|
||||||
prepend_h1: false # Add title of articles as Markdown h1, looks better on certain themes
|
prepend_h1: false # Add title of articles as Markdown h1, looks better on certain themes
|
||||||
# Prepend ID to directory slug, preventing collisions
|
# Prepend ID to directory slug, preventing collisions
|
||||||
# If false, a counter will be appended in case of name collision
|
# If false, a counter will be appended in case of name collision
|
||||||
prepend_id: false
|
prepend_id: false
|
||||||
prepend_lang: false # Prepend lang of the object to directory slug (prenvents collision)
|
prepend_lang: false # Prepend lang of the object to directory slug (prenvents collision)
|
||||||
export_drafts: true # Should we export drafts
|
|
||||||
remove_html: true # Should we clean remaining HTML blocks
|
|
||||||
title_max_length: 40 # Maximum length of a single filename
|
title_max_length: 40 # Maximum length of a single filename
|
||||||
|
|
||||||
|
# Ignored data settings
|
||||||
|
export_drafts: true # Should we export drafts
|
||||||
|
export_empty: true # Should we export empty articles
|
||||||
|
ignore_patterns: [] # List of regexes : Matching sections or articles will be ignored
|
||||||
|
|
||||||
|
# Text body processing settings
|
||||||
|
remove_html: true # Should we clean remaining HTML blocks
|
||||||
unknown_char_replacement: ?? # String to replace broken encoding that cannot be repaired
|
unknown_char_replacement: ?? # String to replace broken encoding that cannot be repaired
|
||||||
|
|
||||||
# You probably don’t want to modify the settings below
|
# Settings you probably don’t want to modify
|
||||||
|
|
||||||
clear_log: true # Clear logfile between runs instead of appending to
|
clear_log: true # Clear logfile between runs instead of appending to
|
||||||
clear_output: true # Clear output dir between runs instead of merging into
|
clear_output: true # Clear output dir between runs instead of merging into
|
||||||
|
|
||||||
|
@ -69,12 +69,13 @@ class Configuration:
|
|||||||
prepend_id: bool = False # Add the ID of object before slug
|
prepend_id: bool = False # Add the ID of object before slug
|
||||||
prepend_lang: bool = False # Add the lang of object before slug
|
prepend_lang: bool = False # Add the lang of object before slug
|
||||||
export_drafts: bool = True # Should we export drafts as draft:true articles
|
export_drafts: bool = True # Should we export drafts as draft:true articles
|
||||||
|
export_empty: bool = True # Should we export empty articles
|
||||||
remove_html: bool = True # Should spip2md remove every HTML tags
|
remove_html: bool = True # Should spip2md remove every HTML tags
|
||||||
title_max_length: int = 40 # Maximum length of a single title for directory names
|
title_max_length: int = 40 # Maximum length of a single title for directory names
|
||||||
unknown_char_replacement: str = "??" # Replaces unknown characters
|
unknown_char_replacement: str = "??" # Replaces unknown characters
|
||||||
clear_log: bool = True # Clear log before every run instead of appending to
|
clear_log: bool = True # Clear log before every run instead of appending to
|
||||||
clear_output: bool = True # Remove eventual output dir before running
|
clear_output: bool = True # Remove eventual output dir before running
|
||||||
ignore_pattern: list[str] = [] # Ignore objects of which title match
|
ignore_patterns: list[str] = [] # Ignore objects of which title match
|
||||||
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
logfile: str = "log-spip2md.log" # File where logs will be written, relative to wd
|
||||||
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
loglevel: str = "WARNING" # Minimum criticity of logs written in logfile
|
||||||
export_filetype: str = "md" # Extension of exported text files
|
export_filetype: str = "md" # Extension of exported text files
|
||||||
|
@ -217,7 +217,11 @@ class SpipWritable:
|
|||||||
if type(message) is FileNotFoundError:
|
if type(message) is FileNotFoundError:
|
||||||
output += "ERROR: NOT FOUND: "
|
output += "ERROR: NOT FOUND: "
|
||||||
elif type(message) is DoesNotExist:
|
elif type(message) is DoesNotExist:
|
||||||
output += "ERROR: NO DESTINATION DIR "
|
output += "ERROR: NO DESTINATION DIR: "
|
||||||
|
elif type(message) is DontExportDraftError:
|
||||||
|
output += "ERROR: NOT EXPORTING DRAFT: "
|
||||||
|
elif type(message) is DontExportEmptyError:
|
||||||
|
output += "ERROR: NOT EXPORTING EMPTY: "
|
||||||
elif type(message) is not str:
|
elif type(message) is not str:
|
||||||
output += "ERROR: UNKNOWN: "
|
output += "ERROR: UNKNOWN: "
|
||||||
# Print the output as the program goes
|
# Print the output as the program goes
|
||||||
@ -244,6 +248,7 @@ class SpipWritable:
|
|||||||
except (
|
except (
|
||||||
LangNotFoundError,
|
LangNotFoundError,
|
||||||
DontExportDraftError,
|
DontExportDraftError,
|
||||||
|
DontExportEmptyError,
|
||||||
IgnoredPatternError,
|
IgnoredPatternError,
|
||||||
FileNotFoundError,
|
FileNotFoundError,
|
||||||
) as err:
|
) as err:
|
||||||
@ -322,6 +327,10 @@ class DontExportDraftError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DontExportEmptyError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SpipRedactional(SpipWritable):
|
class SpipRedactional(SpipWritable):
|
||||||
id_trad: BigIntegerField | BigAutoField | int
|
id_trad: BigIntegerField | BigAutoField | int
|
||||||
id_rubrique: BigAutoField | int
|
id_rubrique: BigAutoField | int
|
||||||
@ -502,7 +511,7 @@ class SpipRedactional(SpipWritable):
|
|||||||
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
|
LOG.debug(f"Apply conversions to {self.lang} `{self._url_title}` title")
|
||||||
self._storage_title = self.convert_field(self._storage_title)
|
self._storage_title = self.convert_field(self._storage_title)
|
||||||
self._url_title = self.convert_field(self._url_title)
|
self._url_title = self.convert_field(self._url_title)
|
||||||
for p in CFG.ignore_pattern:
|
for p in CFG.ignore_patterns:
|
||||||
for title in (self._storage_title, self._url_title):
|
for title in (self._storage_title, self._url_title):
|
||||||
m = match(p, title, I)
|
m = match(p, title, I)
|
||||||
if m is not None:
|
if m is not None:
|
||||||
@ -603,6 +612,8 @@ class SpipRedactional(SpipWritable):
|
|||||||
if len(self._text) > 0:
|
if len(self._text) > 0:
|
||||||
# Remove remaining HTML after & append to body
|
# Remove remaining HTML after & append to body
|
||||||
body += "\n\n" + self._text
|
body += "\n\n" + self._text
|
||||||
|
elif not CFG.export_empty:
|
||||||
|
raise DontExportEmptyError
|
||||||
# Same with an "extra" section
|
# Same with an "extra" section
|
||||||
if len(self._extra) > 0:
|
if len(self._extra) > 0:
|
||||||
body += "\n\n# EXTRA\n\n" + self._extra
|
body += "\n\n# EXTRA\n\n" + self._extra
|
||||||
@ -634,6 +645,7 @@ class SpipRedactional(SpipWritable):
|
|||||||
except (
|
except (
|
||||||
LangNotFoundError,
|
LangNotFoundError,
|
||||||
DontExportDraftError,
|
DontExportDraftError,
|
||||||
|
DontExportEmptyError,
|
||||||
IgnoredPatternError,
|
IgnoredPatternError,
|
||||||
) as err:
|
) as err:
|
||||||
LOG.debug(err)
|
LOG.debug(err)
|
||||||
|
Loading…
Reference in New Issue
Block a user