better unknown chars conversion
This commit is contained in:
parent
477037573a
commit
1dc7d72987
@ -1,5 +1,6 @@
|
||||
# pyright: strict
|
||||
from re import I, S, compile, finditer
|
||||
from typing import Optional
|
||||
|
||||
# SPIP syntax to Markdown
|
||||
spip_to_markdown = (
|
||||
@ -274,7 +275,9 @@ unknown_iso = (
|
||||
)
|
||||
|
||||
|
||||
def convert_body(text: str) -> str:
|
||||
def convert_body(text: Optional[str]) -> str:
|
||||
if text is None:
|
||||
return ""
|
||||
for spip, markdown in spip_to_markdown:
|
||||
text = spip.sub(markdown, text)
|
||||
for iso, utf in iso_to_utf:
|
||||
@ -282,7 +285,9 @@ def convert_body(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def convert_meta(text: str) -> str:
|
||||
def convert_meta(text: Optional[str]) -> str:
|
||||
if text is None:
|
||||
return ""
|
||||
for spip, metadata in spip_to_text:
|
||||
text = spip.sub(metadata, text)
|
||||
for iso, utf in iso_to_utf:
|
||||
|
@ -27,7 +27,7 @@ class Item:
|
||||
self.lang: str = item.lang
|
||||
self.set_lang: bool = item.langue_choisie # TODO Why ?
|
||||
self.translation_key: int = item.id_trad
|
||||
self.extra: str = item.extra # Probably unused
|
||||
self.extra: str = convert_body(item.extra) # Probably unused
|
||||
|
||||
def get_slug(self, date: bool = False) -> str:
|
||||
return slugify(f"{self.publication if date else ''}-{self.title}")
|
||||
@ -64,7 +64,7 @@ class Item:
|
||||
if len(self.text) > 0:
|
||||
body += "\n\n" + self.text
|
||||
# Same with an "extra" section
|
||||
if self.extra is not None and len(self.extra) > 0:
|
||||
if len(self.extra) > 0:
|
||||
body += "\n\n# EXTRA\n\n" + self.extra
|
||||
return body
|
||||
|
||||
@ -77,10 +77,10 @@ class Article(Item):
|
||||
def __init__(self, article) -> None:
|
||||
super().__init__(article)
|
||||
self.id: int = article.id_article
|
||||
self.surtitle: str = article.surtitre # Probably unused
|
||||
self.subtitle: str = article.soustitre # Probably unused
|
||||
self.caption: str = article.chapo # Probably unused
|
||||
self.ps: str = article.ps # Probably unused
|
||||
self.surtitle: str = convert_meta(article.surtitre) # Probably unused
|
||||
self.subtitle: str = convert_meta(article.soustitre) # Probably unused
|
||||
self.caption: str = convert_body(article.chapo) # Probably unused
|
||||
self.ps: str = convert_body(article.ps) # Probably unused
|
||||
self.update_2: str = article.date_modif # Probably unused duplicate of maj
|
||||
self.creation: str = article.date_redac
|
||||
self.forum: bool = article.accepter_forum # TODO Why ?
|
||||
|
@ -111,6 +111,10 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
||||
)
|
||||
# Print the context in which the unknown characters are found
|
||||
for text in unknown_chars_apparitions:
|
||||
print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text, R, RESET) + " …")
|
||||
print(
|
||||
f" {BOLD}…{RESET} "
|
||||
+ highlight_unknown_chars(text, R, RESET)
|
||||
+ f" {BOLD}…{RESET}"
|
||||
)
|
||||
|
||||
db.close() # Close the connection with the database
|
||||
|
Loading…
Reference in New Issue
Block a user