From 12db0375e7f065cacee85f0c9001074fb952f2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilhem=20Faur=C3=A9?= Date: Tue, 16 May 2023 10:01:33 +0200 Subject: [PATCH] better article text build --- spip2md/converter.py | 5 ++- spip2md/iterator.py | 80 ++++++++++++++++++++------------------------ 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/spip2md/converter.py b/spip2md/converter.py index 30e1513..9da8723 100644 --- a/spip2md/converter.py +++ b/spip2md/converter.py @@ -252,7 +252,10 @@ isoToUtf = ( ) ## WARNING unknown broken encoding -unknownIso = (compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings +unknownIso = ( + compile(r"\w*
.*\r?\n"), # unknown 
 + surroundings + compile(r"\w*∆.*\r?\n"), # unknown â^† + surroundings +) def convertBody(spipBody): diff --git a/spip2md/iterator.py b/spip2md/iterator.py index a359a6d..3ac9990 100644 --- a/spip2md/iterator.py +++ b/spip2md/iterator.py @@ -1,3 +1,5 @@ +from array import array + from converter import convertBody, convertMeta from database import * from slugify import slugify @@ -38,7 +40,7 @@ class Article: def get_slug(self): return slugify(f"{self.id}-{self.title}") - + def get_path(self): return self.get_slug() @@ -46,55 +48,45 @@ class Article: return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id) def get_frontmatter(self): - return "---\n{}---".format( - dump( - { - "lang": self.lang, - "title": self.title, - # "subtitle": self.subtitle, - "date": self.creationDate, - "publishDate": self.publicationDate, - "lastmod": self.update, - "draft": self.draft, - "description": self.description, - "authors": [author.id_auteur for author in self.get_authors()], - }, - allow_unicode=True, - ) - ) - - # Contains things before the article like caption & titles - def get_starting(self): - return ( - # f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n" - f"{self.caption}\n\n***\n" - if len(self.caption) > 0 and self.caption != " " - else "" - ) - - # Contains things after the article like ps & extra - def get_ending(self): - return ( - f"# EXTRA\n\n{self.extra}" - if self.extra != None and len(self.extra) > 0 - else "" + f"# POST-SCRIPTUM\n\n{self.ps}" - if len(self.ps) > 0 - else "" + f"# MICROBLOGGING\n\n{self.microblog}" - if len(self.microblog) > 0 - else "" + return dump( + { + "lang": self.lang, + "title": self.title, + # "subtitle": self.subtitle, + "date": self.creationDate, + "publishDate": self.publicationDate, + "lastmod": self.update, + "draft": self.draft, + "description": self.description, + "authors": [author.id_auteur for author in self.get_authors()], + }, + allow_unicode=True, ) def get_article(self): - return "{}\n{}\n{}\n{}".format( - self.get_frontmatter(), - self.get_starting(), - self.text, - self.get_ending(), - ) + # Build the final article text + article: str = "---\n" + self.get_frontmatter() + "---" + # If there is a caption, add the caption followed by a hr + if len(self.caption) > 0: + article += "\n\n" + self.caption + "\n\n***" + # If there is a text, add the text preceded by two line breaks + if len(self.text) > 0: + article += "\n\n" + self.text + # Same with an "extra" section + if self.extra != None and len(self.extra) > 0: + article += "\n\n# EXTRA\n\n" + self.extra + # PS + if len(self.ps) > 0: + article += "\n\n# POST-SCRIPTUM\n\n" + self.ps + # Microblog + if len(self.microblog) > 0: + article += "\n\n# MICROBLOGGING\n\n" + self.microblog + return article class Articles: exported: int = 0 + unknownChars: list = [] def __init__(self, maxToExport) -> None: # Query the DB to retrieve all articles sorted by publication date @@ -111,7 +103,7 @@ class Articles: def __next__(self): if self.remaining() <= 0: - raise StopIteration + raise StopIteration() self.exported += 1 return ( {"exported": self.exported, "remaining": self.remaining()},