fix internal link replacement + lang highlighting in term

This commit is contained in:
Guilhem Fauré 2023-06-08 16:19:56 +02:00
parent cc3a2103d0
commit e279365b8d
2 changed files with 130 additions and 102 deletions

View File

@ -23,7 +23,10 @@ from spip2md.regexmaps import (
CONFIGLANGS, CONFIGLANGS,
DOCUMENT_LINK, DOCUMENT_LINK,
HTMLTAGS, HTMLTAGS,
IMAGE_LINK,
IMAGE_REPL,
ISO_UTF, ISO_UTF,
LINK_REPL,
MULTILANG_BLOCK, MULTILANG_BLOCK,
SECTION_LINK, SECTION_LINK,
SPECIAL_OUTPUT, SPECIAL_OUTPUT,
@ -111,6 +114,9 @@ class NormalizedDocument(SpipInterface, SpipDocuments):
self._id = self.id_document self._id = self.id_document
SpipLinkable = NormalizedSection | NormalizedArticle | NormalizedDocument
class WritableObject(SpipInterface): class WritableObject(SpipInterface):
# Apply a mapping from regex maps # Apply a mapping from regex maps
@staticmethod @staticmethod
@ -361,28 +367,75 @@ class RedactionalObject(WritableObject):
LOG.debug(f"{forced_lang} not found in `{self._title}`") LOG.debug(f"{forced_lang} not found in `{self._title}`")
return text return text
def replace_links( def replace_links(self, text: str) -> str:
self, class LinkMappings:
text: str, _link_types = IMAGE_LINK, DOCUMENT_LINK, SECTION_LINK, ARTICLE_LINK
mapping: tuple,
obj_type: type[NormalizedSection | NormalizedArticle | NormalizedDocument], def __iter__(self):
) -> str: self._type_cursor = 0
for id_link, path_link in mapping: self._link_cursor = -1
# print(f"Looking for links like {id_link}") return self
for m in id_link.finditer(text):
LOG.debug(f"Found document link {m.group()} in {self._title}") @staticmethod
def getdocument(obj_id: int) -> Document:
doc: Document = Document.get(Document.id_document == obj_id)
doc.convert()
return doc
@staticmethod
def getsection(obj_id: int) -> Section:
sec: Section = Section.get(Section.id_rubrique == obj_id)
sec.convert(self.lang)
return sec
@staticmethod
def getarticle(obj_id: int) -> Article:
art: Article = Article.get(Article.id_article == obj_id)
art.convert(self.lang)
return art
_obj_getters = getdocument, getdocument, getsection, getarticle
def __next__(self):
self._link_cursor += 1
# If we reach end of current link type, pass to the beginning of next
if self._link_cursor >= len(self._link_types[self._type_cursor]):
self._link_cursor = 0
self._type_cursor += 1
if self._type_cursor >= len(self._link_types):
raise StopIteration
return (
self._link_types[self._type_cursor][self._link_cursor],
self._obj_getters[self._type_cursor],
IMAGE_REPL if self._type_cursor == 0 else LINK_REPL,
)
for link, getobj, repl in LinkMappings():
# LOG.debug(f"Looking for {link} in {text}")
for m in link.finditer(text):
LOG.debug(f"Found internal link {m.group()} in {self._title}")
try: try:
o: obj_type = obj_type.get(obj_type._id == m.group(2)) LOG.debug(f"Searching for object of id {m.group(2)} with {getobj}")
# TODO get relative path o: SpipLinkable = getobj(int(m.group(2)))
# TODO get full relative path for sections and articles
# TODO rewrite links markup (bold/italic) after stripping
if len(m.group(1)) > 0: if len(m.group(1)) > 0:
repl: str = path_link.format(m.group(1), o.dest_path()) try:
repl = repl.format(
m.group(1).strip("{}"), o.dest_filename()
)
except KeyError as err:
print(repl, m.group(1), o.dest_filename())
raise err
else: else:
repl: str = path_link.format(o._title, o.dest_path()) repl = repl.format(o._title, o.dest_filename())
LOG.debug(f"Translating link to {repl}") LOG.debug(f"Translate link {m.group()} to {repl} in {self._title}")
text = text.replace(m.group(), repl) text = text.replace(m.group(), repl)
except DoesNotExist: except DoesNotExist:
LOG.warn(f"No object for link {m.group()} in {self._title}") LOG.warn(f"No object for link {m.group()} in {self._title}")
text = text.replace(m.group(), path_link.format("", "NOT FOUND"), 1) text = text.replace(m.group(), repl.format("", "NOT FOUND"), 1)
return text return text
# Modify this objects title to prevent filename conflicts # Modify this objects title to prevent filename conflicts
@ -451,13 +504,9 @@ class RedactionalObject(WritableObject):
) )
self._storage_title = self.convert_field(self._storage_title) self._storage_title = self.convert_field(self._storage_title)
self._title = self.translate_field(forced_lang, self._title) self._title = self.translate_field(forced_lang, self._title)
LOG.debug(f"Convert document links of `{self._title}` title") LOG.debug(f"Convert internal links of {self.lang} `{self._title}` title")
self._title = self.replace_links(self._title, DOCUMENT_LINK, Document) self._title = self.replace_links(self._title)
LOG.debug(f"Convert article links of `{self._title}` title") LOG.debug(f"Apply conversions to {self.lang} `{self._title}` title")
self._title = self.replace_links(self._title, ARTICLE_LINK, Article)
LOG.debug(f"Convert section links of `{self._title}` title")
self._title = self.replace_links(self._title, SECTION_LINK, Section)
LOG.debug(f"Apply conversions to `{self._title}` title")
self._title = self.convert_field(self._title) self._title = self.convert_field(self._title)
def convert_text(self, forced_lang: str) -> None: def convert_text(self, forced_lang: str) -> None:
@ -474,12 +523,9 @@ class RedactionalObject(WritableObject):
self._text = "" self._text = ""
return return
self._text = self.translate_field(forced_lang, self.texte.strip()) self._text = self.translate_field(forced_lang, self.texte.strip())
LOG.debug(f"Convert document links of `{self._title}`") LOG.debug(f"Convert internal links of {self.lang} `{self._title}` text")
self._text = self.replace_links(self._text, DOCUMENT_LINK, Document) self._text = self.replace_links(self._text)
LOG.debug(f"Convert article links of `{self._title}`") LOG.debug(f"Apply conversions to {self.lang} `{self._title}` text")
self._text = self.replace_links(self._text, ARTICLE_LINK, Article)
LOG.debug(f"Convert section links of `{self._title}`")
self._text = self.replace_links(self._text, SECTION_LINK, Section)
self._text = self.convert_field(self._text) self._text = self.convert_field(self._text)
def convert_extra(self) -> None: def convert_extra(self) -> None:
@ -495,10 +541,9 @@ class RedactionalObject(WritableObject):
LOG.debug(f"{type(self).__name__} {self._title} extra is empty") LOG.debug(f"{type(self).__name__} {self._title} extra is empty")
self._extra = "" self._extra = ""
return return
LOG.debug(f"Convert article links of `{self._title}`") LOG.debug(f"Convert internal links of {self.lang} `{self._title}` extra")
self._extra = self.replace_links(self.extra, ARTICLE_LINK, Article) self._extra = self.replace_links(self._extra)
LOG.debug(f"Convert section links of `{self._title}`") LOG.debug(f"Apply conversions to {self.lang} `{self._title}` extra")
self._extra = self.replace_links(self._extra, SECTION_LINK, Section)
self._extra = self.convert_field(self._extra) self._extra = self.convert_field(self._extra)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -522,23 +567,20 @@ class RedactionalObject(WritableObject):
# Get the YAML frontmatter string # Get the YAML frontmatter string
def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str: def frontmatter(self, append: Optional[dict[str, Any]] = None) -> str:
# LOG.debug(f"Write frontmatter of `{self._title}`") # LOG.debug(f"Write frontmatter of `{self._title}`")
meta: dict[str, Any] = ( meta: dict[str, Any] = {
{ "lang": self.lang,
"lang": self.lang, "translationKey": self.id_trad,
"translationKey": self.id_trad, "title": self._title,
"title": self._title, "publishDate": self.date,
"publishDate": self.date, "lastmod": self.maj,
"lastmod": self.maj, "draft": self._draft,
"draft": self._draft, "description": self._description,
"description": self._description, # Debugging
# Debugging "spip_id_secteur": self.id_secteur,
"spip_id_secteur": self.id_secteur, "spip_id": self._id,
"spip_id": self._id, }
} if self._url is not None:
| {"url": self._url} meta = meta | {"url": self._url}
if self._url is not None
else {}
)
if append is not None: if append is not None:
return dump(meta | append, allow_unicode=True) return dump(meta | append, allow_unicode=True)
else: else:

View File

@ -110,61 +110,44 @@ SPIP_MARKDOWN = (
) )
DOCUMENT_LINK = ( DOCUMENT_LINK = (
( # SPIP style embeds # SPIP style embeds
compile(r"<()(?:doc|document|emb|embed)([0-9]+)(?:\|(.*?))?>", S | I), compile(r"<()(?:doc|document|emb|embed)([0-9]+)(?:\|(.*?))?>", S | I),
r"[{}]({})", # SPIP style documents & embeds links
), compile(r"\[ *([^\]]*?) *-> *(?:doc|document|emb|embed)([0-9]+) *\]", S | I),
( # SPIP style documents & embeds links # Markdown style documents & embeds links
compile(r"\[ *([^\]]*?) *-> *(?:doc|document|emb|embed)([0-9]+) *\]", S | I), compile(r"\[(.*?)\]\((?:doc|document|emb|embed)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[{}]({})", # SPIP style image links
), compile(r"\[ *([^\]]*?) *-> *(?:img|image)([0-9]+) *\]", S | I),
( # Markdown style documents & embeds links )
compile(r"\[(.*?)\]\((?:doc|document|emb|embed)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[{}]({})", IMAGE_LINK = (
), # SPIP style images embeds
( # SPIP style images embeds compile(r"<()(?:img|image)([0-9]+)(?:\|(.*?))?>", S | I),
compile(r"<()(?:img|image)([0-9]+)(?:\|(.*?))?>", S | I), # Markdown style images links
r"![{}]({})", compile(r"!?\[(.*?)\]\((?:img|image)([0-9]+)(?:\|(.*?))?\)", S | I),
), )
( # SPIP style image links
compile(r"\[ *([^\]]*?) *-> *(?:img|image)([0-9]+) *\]", S | I),
r"[{}]({})",
),
( # Markdown style images links
compile(r"\[(.*?)\]\((?:img|image)([0-9]+)(?:\|(.*?))?\)", S | I),
r"![{}]({})",
),
) # Name and path can be further replaced with .format()
ARTICLE_LINK = ( ARTICLE_LINK = (
( # SPIP style article embeds # SPIP style article embeds
compile(r"<()(?:art|article)([0-9]+)(?:\|(.*?))?>", S | I), compile(r"<()(?:art|article)([0-9]+)(?:\|(.*?))?>", S | I),
r"[{}]({})", # SPIP style article links
), compile(r"\[ *([^\]]*?) *-> *(?:art|article)([0-9]+) *\]", S | I),
( # SPIP style article links # Markdown style internal links
compile(r"\[ *([^\]]*?) *-> *(?:art|article)([0-9]+) *\]", S | I), compile(r"\[(.*?)\]\((?:art|article)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[{}]({})", )
),
( # Markdown style internal links
compile(r"\[(.*?)\]\((?:art|article)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[{}]({})",
),
) # Name and path can be further replaced with .format()
SECTION_LINK = ( SECTION_LINK = (
( # SPIP style sections embeds # SPIP style sections embeds
compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I), compile(r"<()(?:rub|rubrique)([0-9]+)(?:\|(.*?))?>", S | I),
r"[{}]({})", # SPIP style sections links
), compile(r"\[ *([^\]]*?) *-> *(?:rub|rubrique)([0-9]+) *\]", S | I),
( # SPIP style sections links # Markdown style internal links
compile(r"\[ *([^\]]*?) *-> *(?:rub|rubrique)([0-9]+) *\]", S | I), compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I),
r"[{}]({})", )
),
( # Markdown style internal links LINK_REPL = r"[{}]({})" # Name and path can be further replaced with .format()
compile(r"\[(.*?)\]\((?:rub|rubrique)([0-9]+)(?:\|(.*?))?\)", S | I), IMAGE_REPL = r"![{}]({})" # Name and path can be further replaced with .format()
r"[{}]({})",
),
) # Name and path can be further replaced with .format()
# Multi language block, to be further processed per lang # Multi language block, to be further processed per lang
MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I) MULTILANG_BLOCK = compile(r"<multi>(.+?)<\/multi>", S | I)
@ -319,7 +302,10 @@ SPECIAL_OUTPUT = (
compile(r"^([0-9]+?\.)(?= )"), # Counter compile(r"^([0-9]+?\.)(?= )"), # Counter
compile(r"(?<= )(->)(?= )"), # Arrow compile(r"(?<= )(->)(?= )"), # Arrow
compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total compile(r"(?<=^Exporting )([0-9]+?)(?= )"), # Total
) + tuple(compile(r"( " + language + r" )") for language in CFG.export_languages) ) + tuple(
compile(r"(?<=level [0-9] )(" + language + r" )")
for language in CFG.export_languages
)
# Warning elements in terminal output to highlight # Warning elements in terminal output to highlight
WARNING_OUTPUT = ( WARNING_OUTPUT = (