diff --git a/tilavarauspalvelu/migrations/0057_migrate_instructions_to_html.py b/tilavarauspalvelu/migrations/0057_migrate_instructions_to_html.py index e8b6bae4a..31a41bf88 100644 --- a/tilavarauspalvelu/migrations/0057_migrate_instructions_to_html.py +++ b/tilavarauspalvelu/migrations/0057_migrate_instructions_to_html.py @@ -15,7 +15,9 @@ def htmlize(text: str | None) -> str: if text.startswith("<"): return text - text = re.sub(r"(.{1,})", r"

\1

", text) + # Convert linebreaks to paragraphs + pattern = r"(.{1,})" # Match any characters, with a minimum length of 1 (Essentially get rows with content) + text = re.sub(pattern, r"

\1

", text) return text.replace("\n", "") # Remove linebreaks diff --git a/utils/utils.py b/utils/utils.py index de03cb0ce..2c58c1c27 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -189,14 +189,34 @@ def as_p_tags(texts: Iterable[str]) -> str: def convert_html_to_text(html_text: str) -> str: text = html2text(html_text, bodywidth=0) - # Link text and url are the same + # Link text and url are the same: # Remove angle-brackets from links `` -> `url` # If there is a dot after the link, add a space between the link and the dot. - text = re.sub(r"<(https?://[^>]+)>(\.?)", r"\1 \2", text) - # Link text and url are different + # fmt: off + pattern = ( + r"<" # begins with opening bracket + r"(?P(https?://)?[^>]+)" # link, with optional protocol + r">" # followed by closing bracket + r"(?P\.?)" # with optional dot + ) + # fmt: on + text = re.sub(pattern, r"\g \g", text) + + # Link text and url are different: # Replace markdown-style links `[text](url)` with `text ` - text = re.sub(r"\[([^\]]+)\]\(((http|https)?://[^\)]+)\)", r"\1 <\2>", text) + # fmt: off + pattern = ( + r"\[" # begins with "[" + r"(?P[^\]]+)" # any text that is not "]" + r"\]" # followed by "]" + r"\(" # followed by "(" + r"(?P(https?://)?[^\)]+)" # any link, with optional protocol + r"\)" # followed by ")" + ) + # fmt: on + + text = re.sub(pattern, r"\g <\g>", text) # Remove any spaces between newline and the last newline, which is added by html2text return text.replace(" \n", "\n").removesuffix("\n")