sections directories, better unknown char messages
This commit is contained in:
parent
caf9db541f
commit
f23073ef12
@ -13,7 +13,7 @@ class Article:
|
|||||||
# self.surtitle = article.surtitre # Probably unused
|
# self.surtitle = article.surtitre # Probably unused
|
||||||
self.title = convertMeta(article.titre)
|
self.title = convertMeta(article.titre)
|
||||||
self.subtitle = article.soustitre # Probably unused
|
self.subtitle = article.soustitre # Probably unused
|
||||||
# self.section = article.id_rubrique # TODO join
|
self.section_id = article.id_rubrique
|
||||||
self.description = convertMeta(article.descriptif)
|
self.description = convertMeta(article.descriptif)
|
||||||
self.caption = article.chapo # Probably unused
|
self.caption = article.chapo # Probably unused
|
||||||
self.text = convertBody(article.texte) # Markdown
|
self.text = convertBody(article.texte) # Markdown
|
||||||
@ -38,14 +38,30 @@ class Article:
|
|||||||
self.virtual = article.virtuel # TODO Why ?
|
self.virtual = article.virtuel # TODO Why ?
|
||||||
self.microblog = article.microblog # Probably unused
|
self.microblog = article.microblog # Probably unused
|
||||||
|
|
||||||
def getSlug(self):
|
def getSection(self):
|
||||||
return slugify(f"{self.id}-{self.title}")
|
return convertMeta(
|
||||||
|
SpipRubriques.select()
|
||||||
|
.where(SpipRubriques.id_rubrique == self.section_id)[0]
|
||||||
|
.titre
|
||||||
|
)
|
||||||
|
|
||||||
def getPath(self):
|
def getPath(self) -> str:
|
||||||
return self.getSlug()
|
return (
|
||||||
|
slugify(self.getSection()) + "/" + slugify(f"{self.id}-{self.title}") + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
def getFilename(self) -> str:
|
||||||
|
return "index.fr.md"
|
||||||
|
|
||||||
def getAuthors(self):
|
def getAuthors(self):
|
||||||
return SpipAuteursLiens.select().where(SpipAuteursLiens.id_objet == self.id)
|
return (
|
||||||
|
SpipAuteurs.select()
|
||||||
|
.join(
|
||||||
|
SpipAuteursLiens,
|
||||||
|
on=(SpipAuteurs.id_auteur == SpipAuteursLiens.id_auteur),
|
||||||
|
)
|
||||||
|
.where(SpipAuteursLiens.id_objet == self.id)
|
||||||
|
)
|
||||||
|
|
||||||
def getFrontmatter(self):
|
def getFrontmatter(self):
|
||||||
return dump(
|
return dump(
|
||||||
@ -58,7 +74,7 @@ class Article:
|
|||||||
"lastmod": self.update,
|
"lastmod": self.update,
|
||||||
"draft": self.draft,
|
"draft": self.draft,
|
||||||
"description": self.description,
|
"description": self.description,
|
||||||
"authors": [author.id_auteur for author in self.getAuthors()],
|
"authors": [author.nom for author in self.getAuthors()],
|
||||||
},
|
},
|
||||||
allow_unicode=True,
|
allow_unicode=True,
|
||||||
)
|
)
|
||||||
@ -83,7 +99,7 @@ class Article:
|
|||||||
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
|
article += "\n\n# MICROBLOGGING\n\n" + self.microblog
|
||||||
return article
|
return article
|
||||||
|
|
||||||
def getUnknownChars(self):
|
def getUnknownChars(self) -> list:
|
||||||
errors: list = []
|
errors: list = []
|
||||||
for text in (self.title, self.text):
|
for text in (self.title, self.text):
|
||||||
for char in unknownIso:
|
for char in unknownIso:
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from re import I, S, compile, finditer
|
from re import I, S, compile, finditer
|
||||||
|
|
||||||
# SPIP syntax to Markdown
|
# SPIP syntax to Markdown
|
||||||
spipToMarkdown = (
|
spipToMarkdown: tuple = (
|
||||||
( # horizontal rule
|
( # horizontal rule
|
||||||
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
|
compile(r"- ?- ?- ?- ?[\- ]*|<hr ?.*?>", S | I),
|
||||||
# r"---",
|
# r"---",
|
||||||
@ -113,7 +113,7 @@ spipToMarkdown = (
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
spipToText = (
|
spipToText: tuple = (
|
||||||
( # strong
|
( # strong
|
||||||
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
compile(r"\{\{ *(.*?) *\}\}", S | I),
|
||||||
r"\1",
|
r"\1",
|
||||||
@ -158,7 +158,7 @@ spipToText = (
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
isoToUtf = (
|
isoToUtf: tuple = (
|
||||||
# Broken encoding
|
# Broken encoding
|
||||||
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
( # Fix UTF-8 appostrophe that was interpreted as ISO 8859-1
|
||||||
"’",
|
"’",
|
||||||
@ -252,14 +252,13 @@ isoToUtf = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
## WARNING unknown broken encoding
|
## WARNING unknown broken encoding
|
||||||
unknownIso = (
|
unknownIso: tuple = (
|
||||||
r"
", # unknown 
 + surroundings
|
r"
", # unknown 

|
||||||
r"∆", # unknown â^† + surroundings
|
r"∆", # unknown â^†
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def convertBody(spipBody):
|
def convertBody(text: str) -> str:
|
||||||
text: str = spipBody
|
|
||||||
for spip, markdown in spipToMarkdown:
|
for spip, markdown in spipToMarkdown:
|
||||||
text = spip.sub(markdown, text)
|
text = spip.sub(markdown, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
@ -267,18 +266,22 @@ def convertBody(spipBody):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def convertMeta(spipMeta):
|
def convertMeta(text: str) -> str:
|
||||||
text: str = spipMeta
|
|
||||||
for spip, metadata in spipToText:
|
for spip, metadata in spipToText:
|
||||||
text = spip.sub(metadata, text)
|
text = spip.sub(metadata, text)
|
||||||
for iso, utf in isoToUtf:
|
for iso, utf in isoToUtf:
|
||||||
text.replace(iso, utf)
|
text.replace(iso, utf)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def highlightUnknownChars(text):
|
def removeUnknownChars(text: str) -> str:
|
||||||
|
for char in unknownIso:
|
||||||
|
text.replace(char, "")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def highlightUnknownChars(text: str) -> str:
|
||||||
# Define terminal escape sequences to stylize output, regex escaped
|
# Define terminal escape sequences to stylize output, regex escaped
|
||||||
COLOR = "\033[91m" + "\033[1m" # Red + Bold
|
COLOR: str = "\033[91m" + "\033[1m" # Red + Bold
|
||||||
RESET = "\033[0m"
|
RESET: str = "\033[0m"
|
||||||
# Highlight in COLOR unknown chars in text
|
# Highlight in COLOR unknown chars in text
|
||||||
for char in unknownIso:
|
for char in unknownIso:
|
||||||
for match in finditer(char, text):
|
for match in finditer(char, text):
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
#!python
|
#!python
|
||||||
|
from articles import Article, Articles
|
||||||
from config import config
|
from config import config
|
||||||
from database import db
|
|
||||||
from articles import Articles
|
|
||||||
from converter import highlightUnknownChars
|
from converter import highlightUnknownChars
|
||||||
|
from database import db
|
||||||
|
|
||||||
if __name__ != "__main__":
|
if __name__ != "__main__":
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from os import mkdir
|
from os import makedirs, mkdir
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
|
||||||
# Clean the output dir & create a new
|
# Clean the output dir & create a new
|
||||||
@ -32,7 +32,8 @@ B: str = "\033[94m"
|
|||||||
BOLD: str = "\033[1m"
|
BOLD: str = "\033[1m"
|
||||||
RESET: str = "\033[0m"
|
RESET: str = "\033[0m"
|
||||||
|
|
||||||
unknownChars: dict = {}
|
# Articles that contains unknown chars
|
||||||
|
unknownCharsArticles: list[Article] = []
|
||||||
|
|
||||||
# Loop among first maxToExport articles & export them
|
# Loop among first maxToExport articles & export them
|
||||||
for counter, article in Articles(maxToExport):
|
for counter, article in Articles(maxToExport):
|
||||||
@ -44,24 +45,26 @@ for counter, article in Articles(maxToExport):
|
|||||||
print(
|
print(
|
||||||
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
|
f"{BOLD}{counter['exported']}.{RESET} " + highlightUnknownChars(article.title)
|
||||||
)
|
)
|
||||||
fullPath = config.outputDir + "/" + article.getPath()
|
fullPath: str = config.outputDir + "/" + article.getPath()
|
||||||
print(f"{BOLD}>{RESET} {fullPath}/index.md")
|
print(f"{BOLD}>{RESET} {fullPath}{article.getFilename()}")
|
||||||
mkdir(fullPath)
|
makedirs(fullPath, exist_ok=True)
|
||||||
with open(fullPath + "/index.md", "w") as f:
|
with open(fullPath + article.getFilename(), "w") as f:
|
||||||
f.write(article.getArticle())
|
f.write(article.getArticle())
|
||||||
# Store detected unknown characters
|
# Store detected unknown characters
|
||||||
if len(article.getUnknownChars()) > 0:
|
if len(article.getUnknownChars()) > 0:
|
||||||
unknownChars[article.title] = article.getUnknownChars()
|
unknownCharsArticles.append(article)
|
||||||
|
|
||||||
for title in unknownChars:
|
for article in unknownCharsArticles:
|
||||||
nb = len(unknownChars[title])
|
unknownCharsApparitions: list = article.getUnknownChars()
|
||||||
|
nb: int = len(unknownCharsApparitions)
|
||||||
|
s: str = "s" if nb > 1 else ""
|
||||||
print(
|
print(
|
||||||
f"\n{BOLD}{nb} "
|
f"\n{BOLD}{nb}{RESET} unknown character{s} "
|
||||||
+ f"unknown character{'s' if nb > 1 else ''} detected in{RESET} " +
|
+ f"detected in article {BOLD}{article.id}{RESET}"
|
||||||
highlightUnknownChars(title)
|
+ f"\n{BOLD}·{RESET} "
|
||||||
|
+ highlightUnknownChars(article.title)
|
||||||
)
|
)
|
||||||
for text in unknownChars[title]:
|
for text in unknownCharsApparitions:
|
||||||
print(f" {BOLD}…{RESET} " + highlightUnknownChars(text))
|
print(f" {BOLD}…{RESET} " + highlightUnknownChars(text))
|
||||||
|
|
||||||
# Close the database connection
|
db.close() # Close the database connection
|
||||||
db.close()
|
|
||||||
|
Loading…
Reference in New Issue
Block a user