From 1dc7d72987173e3fffe7df08f8f62c71ec17a2ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Mon, 22 May 2023 11:32:05 +0200 Subject: [PATCH] better unknown chars conversion --- spip2md/converter.py | 9 +++++++-- spip2md/items.py | 12 ++++++------ spip2md/main.py | 6 +++++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/spip2md/converter.py b/spip2md/converter.py index 0e56608..7f5182a 100644 --- a/spip2md/converter.py +++ b/spip2md/converter.py @@ -1,5 +1,6 @@ # pyright: strict from re import I, S, compile, finditer +from typing import Optional # SPIP syntax to Markdown spip_to_markdown = ( @@ -274,7 +275,9 @@ unknown_iso = ( ) -def convert_body(text: str) -> str: +def convert_body(text: Optional[str]) -> str: + if text is None: + return "" for spip, markdown in spip_to_markdown: text = spip.sub(markdown, text) for iso, utf in iso_to_utf: @@ -282,7 +285,9 @@ def convert_body(text: str) -> str: return text -def convert_meta(text: str) -> str: +def convert_meta(text: Optional[str]) -> str: + if text is None: + return "" for spip, metadata in spip_to_text: text = spip.sub(metadata, text) for iso, utf in iso_to_utf: diff --git a/spip2md/items.py b/spip2md/items.py index 47f0b6e..35da725 100644 --- a/spip2md/items.py +++ b/spip2md/items.py @@ -27,7 +27,7 @@ class Item: self.lang: str = item.lang self.set_lang: bool = item.langue_choisie # TODO Why ? self.translation_key: int = item.id_trad - self.extra: str = item.extra # Probably unused + self.extra: str = convert_body(item.extra) # Probably unused def get_slug(self, date: bool = False) -> str: return slugify(f"{self.publication if date else ''}-{self.title}") @@ -64,7 +64,7 @@ class Item: if len(self.text) > 0: body += "\n\n" + self.text # Same with an "extra" section - if self.extra is not None and len(self.extra) > 0: + if len(self.extra) > 0: body += "\n\n# EXTRA\n\n" + self.extra return body @@ -77,10 +77,10 @@ class Article(Item): def __init__(self, article) -> None: super().__init__(article) self.id: int = article.id_article - self.surtitle: str = article.surtitre # Probably unused - self.subtitle: str = article.soustitre # Probably unused - self.caption: str = article.chapo # Probably unused - self.ps: str = article.ps # Probably unused + self.surtitle: str = convert_meta(article.surtitre) # Probably unused + self.subtitle: str = convert_meta(article.soustitre) # Probably unused + self.caption: str = convert_body(article.chapo) # Probably unused + self.ps: str = convert_body(article.ps) # Probably unused self.update_2: str = article.date_modif # Probably unused duplicate of maj self.creation: str = article.date_redac self.forum: bool = article.accepter_forum # TODO Why ? diff --git a/spip2md/main.py b/spip2md/main.py index 1b907f4..cfb9e5a 100755 --- a/spip2md/main.py +++ b/spip2md/main.py @@ -111,6 +111,10 @@ if __name__ == "__main__": # Following is executed only if script is directly e ) # Print the context in which the unknown characters are found for text in unknown_chars_apparitions: - print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text, R, RESET) + " …") + print( + f" {BOLD}…{RESET} " + + highlight_unknown_chars(text, R, RESET) + + f" {BOLD}…{RESET}" + ) db.close() # Close the connection with the database