started allowing to gather unknown encoding bugs
This commit is contained in:
parent
b8f99fb329
commit
bc616cc7a1
@ -257,23 +257,25 @@ unknownIso = (compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings
|
||||
|
||||
def convertBody(spipBody):
|
||||
text = spipBody
|
||||
errors = []
|
||||
for spip, markdown in spipToMarkdown:
|
||||
text = spip.sub(markdown, text)
|
||||
for iso, utf in isoToUtf:
|
||||
text = iso.sub(utf, text)
|
||||
for iso in unknownIso:
|
||||
for match in iso.finditer(text):
|
||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
||||
return text
|
||||
errors.append(match.group())
|
||||
return text, errors
|
||||
|
||||
|
||||
def convertMeta(spipMeta):
|
||||
text = spipMeta
|
||||
errors = []
|
||||
for spip, metadata in spipToText:
|
||||
text = spip.sub(metadata, text)
|
||||
for iso, utf in isoToUtf:
|
||||
text = iso.sub(utf, text)
|
||||
for iso in unknownIso:
|
||||
for match in iso.finditer(text):
|
||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
||||
return text
|
||||
errors.append(match.group())
|
||||
return text, errors
|
||||
|
@ -9,12 +9,12 @@ class Article:
|
||||
def __init__(self, article):
|
||||
self.id = article.id_article
|
||||
# self.surtitle = article.surtitre # Probably unused
|
||||
self.title = convertMeta(article.titre)
|
||||
self.title, self.title_unknown = convertMeta(article.titre)
|
||||
self.subtitle = article.soustitre # Probably unused
|
||||
# self.section = article.id_rubrique # TODO join
|
||||
self.description = convertMeta(article.descriptif)
|
||||
self.description, self.description_unknown = convertMeta(article.descriptif)
|
||||
self.caption = article.chapo # Probably unused
|
||||
self.text = convertBody(article.texte) # Markdown
|
||||
self.text, self.text_unknown = convertBody(article.texte) # Markdown
|
||||
self.ps = article.ps # Probably unused
|
||||
self.publicationDate = article.date
|
||||
self.draft = False if article.statut == "publie" else True
|
||||
|
Loading…
Reference in New Issue
Block a user