better article text build

This commit is contained in:
Guilhem Fauré 2023-05-16 10:01:33 +02:00
parent bc616cc7a1
commit 12db0375e7
2 changed files with 40 additions and 45 deletions

View File

@ -252,7 +252,10 @@ isoToUtf = (
) )
## WARNING unknown broken encoding ## WARNING unknown broken encoding
unknownIso = (compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings unknownIso = (
compile(r"\w*
.*\r?\n"), # unknown 
 + surroundings
compile(r"\w*∆.*\r?\n"), # unknown â^† + surroundings
)
def convertBody(spipBody): def convertBody(spipBody):

View File

@ -1,3 +1,5 @@
from array import array
from converter import convertBody, convertMeta from converter import convertBody, convertMeta
from database import * from database import *
from slugify import slugify from slugify import slugify
@ -46,8 +48,7 @@ class Article:
return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id) return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
def get_frontmatter(self): def get_frontmatter(self):
return "---\n{}---".format( return dump(
dump(
{ {
"lang": self.lang, "lang": self.lang,
"title": self.title, "title": self.title,
@ -61,40 +62,31 @@ class Article:
}, },
allow_unicode=True, allow_unicode=True,
) )
)
# Contains things before the article like caption & titles
def get_starting(self):
return (
# f"{self.caption}\n" if len(self.caption) > 0 else "" + f"# {self.title}\n"
f"{self.caption}\n\n***\n"
if len(self.caption) > 0 and self.caption != " "
else ""
)
# Contains things after the article like ps & extra
def get_ending(self):
return (
f"# EXTRA\n\n{self.extra}"
if self.extra != None and len(self.extra) > 0
else "" + f"# POST-SCRIPTUM\n\n{self.ps}"
if len(self.ps) > 0
else "" + f"# MICROBLOGGING\n\n{self.microblog}"
if len(self.microblog) > 0
else ""
)
def get_article(self): def get_article(self):
return "{}\n{}\n{}\n{}".format( # Build the final article text
self.get_frontmatter(), article: str = "---\n" + self.get_frontmatter() + "---"
self.get_starting(), # If there is a caption, add the caption followed by a hr
self.text, if len(self.caption) > 0:
self.get_ending(), article += "\n\n" + self.caption + "\n\n***"
) # If there is a text, add the text preceded by two line breaks
if len(self.text) > 0:
article += "\n\n" + self.text
# Same with an "extra" section
if self.extra != None and len(self.extra) > 0:
article += "\n\n# EXTRA\n\n" + self.extra
# PS
if len(self.ps) > 0:
article += "\n\n# POST-SCRIPTUM\n\n" + self.ps
# Microblog
if len(self.microblog) > 0:
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
return article
class Articles: class Articles:
exported: int = 0 exported: int = 0
unknownChars: list = []
def __init__(self, maxToExport) -> None: def __init__(self, maxToExport) -> None:
# Query the DB to retrieve all articles sorted by publication date # Query the DB to retrieve all articles sorted by publication date
@ -111,7 +103,7 @@ class Articles:
def __next__(self): def __next__(self):
if self.remaining() <= 0: if self.remaining() <= 0:
raise StopIteration raise StopIteration()
self.exported += 1 self.exported += 1
return ( return (
{"exported": self.exported, "remaining": self.remaining()}, {"exported": self.exported, "remaining": self.remaining()},