better unknown chars conversion
This commit is contained in:
parent
477037573a
commit
1dc7d72987
@ -1,5 +1,6 @@
|
|||||||
# pyright: strict
|
# pyright: strict
|
||||||
from re import I, S, compile, finditer
|
from re import I, S, compile, finditer
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
# SPIP syntax to Markdown
|
# SPIP syntax to Markdown
|
||||||
spip_to_markdown = (
|
spip_to_markdown = (
|
||||||
@ -274,7 +275,9 @@ unknown_iso = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convert_body(text: str) -> str:
|
def convert_body(text: Optional[str]) -> str:
|
||||||
|
if text is None:
|
||||||
|
return ""
|
||||||
for spip, markdown in spip_to_markdown:
|
for spip, markdown in spip_to_markdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in iso_to_utf:
|
for iso, utf in iso_to_utf:
|
||||||
@ -282,7 +285,9 @@ def convert_body(text: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def convert_meta(text: str) -> str:
|
def convert_meta(text: Optional[str]) -> str:
|
||||||
|
if text is None:
|
||||||
|
return ""
|
||||||
for spip, metadata in spip_to_text:
|
for spip, metadata in spip_to_text:
|
||||||
text = spip.sub(metadata, text)
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in iso_to_utf:
|
for iso, utf in iso_to_utf:
|
||||||
|
@ -27,7 +27,7 @@ class Item:
|
|||||||
self.lang: str = item.lang
|
self.lang: str = item.lang
|
||||||
self.set_lang: bool = item.langue_choisie # TODO Why ?
|
self.set_lang: bool = item.langue_choisie # TODO Why ?
|
||||||
self.translation_key: int = item.id_trad
|
self.translation_key: int = item.id_trad
|
||||||
self.extra: str = item.extra # Probably unused
|
self.extra: str = convert_body(item.extra) # Probably unused
|
||||||
|
|
||||||
def get_slug(self, date: bool = False) -> str:
|
def get_slug(self, date: bool = False) -> str:
|
||||||
return slugify(f"{self.publication if date else ''}-{self.title}")
|
return slugify(f"{self.publication if date else ''}-{self.title}")
|
||||||
@ -64,7 +64,7 @@ class Item:
|
|||||||
if len(self.text) > 0:
|
if len(self.text) > 0:
|
||||||
body += "\n\n" + self.text
|
body += "\n\n" + self.text
|
||||||
# Same with an "extra" section
|
# Same with an "extra" section
|
||||||
if self.extra is not None and len(self.extra) > 0:
|
if len(self.extra) > 0:
|
||||||
body += "\n\n# EXTRA\n\n" + self.extra
|
body += "\n\n# EXTRA\n\n" + self.extra
|
||||||
return body
|
return body
|
||||||
|
|
||||||
@ -77,10 +77,10 @@ class Article(Item):
|
|||||||
def __init__(self, article) -> None:
|
def __init__(self, article) -> None:
|
||||||
super().__init__(article)
|
super().__init__(article)
|
||||||
self.id: int = article.id_article
|
self.id: int = article.id_article
|
||||||
self.surtitle: str = article.surtitre # Probably unused
|
self.surtitle: str = convert_meta(article.surtitre) # Probably unused
|
||||||
self.subtitle: str = article.soustitre # Probably unused
|
self.subtitle: str = convert_meta(article.soustitre) # Probably unused
|
||||||
self.caption: str = article.chapo # Probably unused
|
self.caption: str = convert_body(article.chapo) # Probably unused
|
||||||
self.ps: str = article.ps # Probably unused
|
self.ps: str = convert_body(article.ps) # Probably unused
|
||||||
self.update_2: str = article.date_modif # Probably unused duplicate of maj
|
self.update_2: str = article.date_modif # Probably unused duplicate of maj
|
||||||
self.creation: str = article.date_redac
|
self.creation: str = article.date_redac
|
||||||
self.forum: bool = article.accepter_forum # TODO Why ?
|
self.forum: bool = article.accepter_forum # TODO Why ?
|
||||||
|
@ -111,6 +111,10 @@ if __name__ == "__main__": # Following is executed only if script is directly e
|
|||||||
)
|
)
|
||||||
# Print the context in which the unknown characters are found
|
# Print the context in which the unknown characters are found
|
||||||
for text in unknown_chars_apparitions:
|
for text in unknown_chars_apparitions:
|
||||||
print(f" {BOLD}…{RESET} " + highlight_unknown_chars(text, R, RESET) + " …")
|
print(
|
||||||
|
f" {BOLD}…{RESET} "
|
||||||
|
+ highlight_unknown_chars(text, R, RESET)
|
||||||
|
+ f" {BOLD}…{RESET}"
|
||||||
|
)
|
||||||
|
|
||||||
db.close() # Close the connection with the database
|
db.close() # Close the connection with the database
|
||||||
|
Loading…
Reference in New Issue
Block a user