started allowing to gather unknown encoding bugs
This commit is contained in:
parent
b8f99fb329
commit
bc616cc7a1
@ -257,23 +257,25 @@ unknownIso = (compile(r"\w*
.*\r?\n"),) # unknown 
 + surroundings
|
|||||||
|
|
||||||
def convertBody(spipBody):
|
def convertBody(spipBody):
|
||||||
text = spipBody
|
text = spipBody
|
||||||
|
errors = []
|
||||||
for spip, markdown in spipToMarkdown:
|
for spip, markdown in spipToMarkdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
text = iso.sub(utf, text)
|
text = iso.sub(utf, text)
|
||||||
for iso in unknownIso:
|
for iso in unknownIso:
|
||||||
for match in iso.finditer(text):
|
for match in iso.finditer(text):
|
||||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
errors.append(match.group())
|
||||||
return text
|
return text, errors
|
||||||
|
|
||||||
|
|
||||||
def convertMeta(spipMeta):
|
def convertMeta(spipMeta):
|
||||||
text = spipMeta
|
text = spipMeta
|
||||||
|
errors = []
|
||||||
for spip, metadata in spipToText:
|
for spip, metadata in spipToText:
|
||||||
text = spip.sub(metadata, text)
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
text = iso.sub(utf, text)
|
text = iso.sub(utf, text)
|
||||||
for iso in unknownIso:
|
for iso in unknownIso:
|
||||||
for match in iso.finditer(text):
|
for match in iso.finditer(text):
|
||||||
print(f" UNKNOWN CHARACTER {match.group()}")
|
errors.append(match.group())
|
||||||
return text
|
return text, errors
|
||||||
|
@ -9,12 +9,12 @@ class Article:
|
|||||||
def __init__(self, article):
|
def __init__(self, article):
|
||||||
self.id = article.id_article
|
self.id = article.id_article
|
||||||
# self.surtitle = article.surtitre # Probably unused
|
# self.surtitle = article.surtitre # Probably unused
|
||||||
self.title = convertMeta(article.titre)
|
self.title, self.title_unknown = convertMeta(article.titre)
|
||||||
self.subtitle = article.soustitre # Probably unused
|
self.subtitle = article.soustitre # Probably unused
|
||||||
# self.section = article.id_rubrique # TODO join
|
# self.section = article.id_rubrique # TODO join
|
||||||
self.description = convertMeta(article.descriptif)
|
self.description, self.description_unknown = convertMeta(article.descriptif)
|
||||||
self.caption = article.chapo # Probably unused
|
self.caption = article.chapo # Probably unused
|
||||||
self.text = convertBody(article.texte) # Markdown
|
self.text, self.text_unknown = convertBody(article.texte) # Markdown
|
||||||
self.ps = article.ps # Probably unused
|
self.ps = article.ps # Probably unused
|
||||||
self.publicationDate = article.date
|
self.publicationDate = article.date
|
||||||
self.draft = False if article.statut == "publie" else True
|
self.draft = False if article.statut == "publie" else True
|
||||||
|
Loading…
Reference in New Issue
Block a user