From 2a6956244082445346abe32f516eb7aa6d28f50c Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:39:43 -0500 Subject: [PATCH 01/10] added html2text dependency (mandatory, for now) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index fdeb67b1..ff10c5a3 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ "wcwidth>=0.1.7", "urwid>=2.0.0,<3.0", "tomlkit>=0.10.0,<1.0" + "html2text>=2020.1.16" ], extras_require={ # Required to display rich text in the TUI From 3b5d69d308495168a40037bb2ab2201fbe1aef95 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:40:12 -0500 Subject: [PATCH 02/10] Display status messages in Markdown formaat --- toot/output.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/toot/output.py b/toot/output.py index bf5ee875..7bdc3332 100644 --- a/toot/output.py +++ b/toot/output.py @@ -2,10 +2,11 @@ import re import sys import textwrap +import html2text from functools import lru_cache from toot import settings -from toot.utils import get_text, html_to_paragraphs +from toot.utils import get_text from toot.entities import Account, Instance, Notification, Poll, Status from toot.wcstring import wc_wrap from typing import List @@ -174,7 +175,6 @@ def print_account(account: Account): print_out(f"@{account.acct} {account.display_name}") if account.note: - print_out("") print_html(account.note) since = account.created_at.strftime('%Y-%m-%d') @@ -299,7 +299,6 @@ def print_status(status: Status, width: int = 80): f"{time}", ) - print_out("") print_html(status.content, width) if status.media_attachments: @@ -322,14 +321,20 @@ def print_status(status: Status, width: int = 80): def print_html(text, width=80): - first = True - for paragraph in html_to_paragraphs(text): - if not first: - print_out("") - for line in paragraph: - for subline in wc_wrap(line, width): - print_out(highlight_hashtags(subline)) - first = False + h2t = html2text.HTML2Text() + + h2t.body_width = width + h2t.single_line_break = True + h2t.ignore_links = True + h2t.wrap_links = True + h2t.wrap_list_items = True + h2t.wrap_tables = True + h2t.unicode_snob = True + h2t.ul_item_mark = "\N{bullet}" + markdown = h2t.handle(text).strip() + + print_out("") + print_out(highlight_hashtags(markdown)) def print_poll(poll: Poll): From d90aee2de634fb7f60823771d1c27fcbd7bde7c5 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:41:07 -0500 Subject: [PATCH 03/10] Test for rendering HTML status message in Markdown --- tests/test_console.py | 206 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 2 deletions(-) diff --git a/tests/test_console.py b/tests/test_console.py index 3b7d5f26..5eeb1717 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -152,6 +152,210 @@ def test_timeline(mock_get, monkeypatch, capsys): assert err == "" +@mock.patch('toot.http.get') +def test_timeline_html_content(mock_get, monkeypatch, capsys): + mock_get.return_value = MockResponse([{ + 'id': '111111111111111111', + 'account': { + 'display_name': 'Frank Zappa 🎸', + 'acct': 'fz' + }, + 'created_at': '2017-04-12T15:53:18.174Z', + 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", + 'reblog': None, + 'in_reply_to_id': None, + 'media_attachments': [], + }]) + + console.run_command(app, user, 'timeline', ['--once']) + + mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) + + out, err = capsys.readouterr() + lines = out.split("\n") + reference = [ + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", + "", + "## HTML Render Test", + "", + " _emphasized_ ", + " _underlined_ ", + " **bold** ", + " ** _bold and italic_** ", + " ~~strikethrough~~ ", + "regular text", + "", + "Code block:", + "", + " ", + " 10 PRINT \"HELLO WORLD\" ", + " 20 GOTO 10 ", + " ", + "> Something blockquoted here. The indentation is maintained as the text line wraps.", + " 1. List item", + " • Nested item", + " • Another nested ", + " 2. Another list item. ", + " 1. Something else nested", + " 2. And a last nested", + "", + "> Blockquote", + "> 1. List in BQ", + "> 2. List item 2 in BQ", + ">", + "", + "#hashtag #test ", + "https://a.com text after link", + "", + "ID 111111111111111111 ", + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "", + ] + + assert len(lines) == len(reference) + for index, line in enumerate(lines): + assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" + + assert err == "" + + +@mock.patch('toot.http.get') +def test_timeline_html_content(mock_get, monkeypatch, capsys): + mock_get.return_value = MockResponse([{ + 'id': '111111111111111111', + 'account': { + 'display_name': 'Frank Zappa 🎸', + 'acct': 'fz' + }, + 'created_at': '2017-04-12T15:53:18.174Z', + 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", + 'reblog': None, + 'in_reply_to_id': None, + 'media_attachments': [], + }]) + + console.run_command(app, user, 'timeline', ['--once']) + + mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) + + out, err = capsys.readouterr() + lines = out.split("\n") + reference = [ + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", + "", + "## HTML Render Test", + "", + " _emphasized_ ", + " _underlined_ ", + " **bold** ", + " ** _bold and italic_** ", + " ~~strikethrough~~ ", + "regular text", + "", + "Code block:", + "", + " ", + " 10 PRINT \"HELLO WORLD\" ", + " 20 GOTO 10 ", + " ", + "> Something blockquoted here. The indentation is maintained as the text line wraps.", + " 1. List item", + " • Nested item", + " • Another nested ", + " 2. Another list item. ", + " 1. Something else nested", + " 2. And a last nested", + "", + "> Blockquote", + "> 1. List in BQ", + "> 2. List item 2 in BQ", + ">", + "", + "#hashtag #test ", + "https://a.com text after link", + "", + "ID 111111111111111111 ", + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "", + ] + + assert len(lines) == len(reference) + for index, line in enumerate(lines): + assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" + + assert err == "" + + +@mock.patch('toot.http.get') +def test_timeline_html_content(mock_get, monkeypatch, capsys): + mock_get.return_value = MockResponse([{ + 'id': '111111111111111111', + 'account': { + 'display_name': 'Frank Zappa 🎸', + 'acct': 'fz' + }, + 'created_at': '2017-04-12T15:53:18.174Z', + 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", + 'reblog': None, + 'in_reply_to_id': None, + 'media_attachments': [], + }]) + + console.run_command(app, user, 'timeline', ['--once']) + + mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) + + out, err = capsys.readouterr() + lines = out.split("\n") + reference = [ + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", + "", + "## HTML Render Test", + "", + " _emphasized_ ", + " _underlined_ ", + " **bold** ", + " ** _bold and italic_** ", + " ~~strikethrough~~ ", + "regular text", + "", + "Code block:", + "", + " ", + " 10 PRINT \"HELLO WORLD\" ", + " 20 GOTO 10 ", + " ", + "> Something blockquoted here. The indentation is maintained as the text line wraps.", + " 1. List item", + " • Nested item", + " • Another nested ", + " 2. Another list item. ", + " 1. Something else nested", + " 2. And a last nested", + "", + "> Blockquote", + "> 1. List in BQ", + "> 2. List item 2 in BQ", + ">", + "", + "#hashtag #test ", + "https://a.com text after link", + "", + "ID 111111111111111111 ", + "────────────────────────────────────────────────────────────────────────────────────────────────────", + "", + ] + + assert len(lines) == len(reference) + for index, line in enumerate(lines): + assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" + + assert err == "" + + @mock.patch('toot.http.get') def test_timeline_with_re(mock_get, monkeypatch, capsys): mock_get.return_value = MockResponse([{ @@ -588,8 +792,6 @@ def test_notifications(mock_get, capsys): "────────────────────────────────────────────────────────────────────────────────────────────────────", "", ]) - - @mock.patch('toot.http.get') def test_notifications_empty(mock_get, capsys): mock_get.return_value = MockResponse([]) From a2fd01c8ee34c66d83f546f5a075faf539f6d550 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:41:50 -0500 Subject: [PATCH 04/10] Render status messages copied to clipboard in Markdown --- toot/tui/app.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/toot/tui/app.py b/toot/tui/app.py index 349322c9..d90428d9 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -1,12 +1,14 @@ import logging import subprocess import urwid +import html2text from concurrent.futures import ThreadPoolExecutor from toot import api, config, __version__, settings from toot.console import get_default_visibility from toot.exceptions import ApiError +from toot.utils.datetime import parse_datetime from .compose import StatusComposer from .constants import PALETTE @@ -654,9 +656,26 @@ def _done(loop): return self.run_in_thread(_delete, done_callback=_done) def copy_status(self, status): - # TODO: copy a better version of status content - # including URLs - copy_to_clipboard(self.screen, status.original.data["content"]) + h2t = html2text.HTML2Text() + h2t.body_width = 0 # nowrap + h2t.single_line_break = True + h2t.ignore_links = True + h2t.unicode_snob = True + h2t.ul_item_mark = "\N{bullet}" + + time = parse_datetime(status.original.data['created_at']) + time = time.strftime('%Y-%m-%d %H:%M %Z') + + text_status = (f"{status.original.data['url']}\n\n" + + (status.original.author.display_name or "") + + "\n" + + (status.original.author.account or "") + + "\n\n" + + h2t.handle(status.original.data["content"]).strip() + + "\n\n" + + f"Created at: {time}") + + copy_to_clipboard(self.screen, text_status) self.footer.set_message(f"Status {status.original.id} copied") # --- Overlay handling ----------------------------------------------------- From eeb599e5ed573ee43326069070450f6189985715 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:42:17 -0500 Subject: [PATCH 05/10] Render status messages in the TUI using Markdown (This is a fallback from rendering using urwidgets, if urwidgets library is not available.) --- toot/tui/richtext/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py index 07e31c8e..f476c8b9 100644 --- a/toot/tui/richtext/__init__.py +++ b/toot/tui/richtext/__init__.py @@ -1,7 +1,7 @@ import urwid +import html2text from toot.tui.utils import highlight_hashtags -from toot.utils import format_content from typing import List try: @@ -10,9 +10,19 @@ # Fallback if urwidgets are not available def html_to_widgets(html: str) -> List[urwid.Widget]: return [ - urwid.Text(highlight_hashtags(line)) - for line in format_content(html) + urwid.Text(highlight_hashtags(_format_markdown(html))) ] def url_to_widget(url: str): return urwid.Text(("link", url)) + + def _format_markdown(html) -> str: + h2t = html2text.HTML2Text() + h2t.single_line_break = True + h2t.ignore_links = True + h2t.wrap_links = False + h2t.wrap_list_items = False + h2t.wrap_tables = False + h2t.unicode_snob = True + h2t.ul_item_mark = "\N{bullet}" + return h2t.handle(html).strip() From 60a1c749215a681d390c6e779470f918c9a9a27f Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:46:21 -0500 Subject: [PATCH 06/10] Removed highlight_hashtags from Markdown rendering in TUI It's not working right for multiline statuses that start with a # character --- toot/tui/richtext/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py index f476c8b9..2793493f 100644 --- a/toot/tui/richtext/__init__.py +++ b/toot/tui/richtext/__init__.py @@ -1,7 +1,6 @@ import urwid import html2text -from toot.tui.utils import highlight_hashtags from typing import List try: @@ -10,7 +9,7 @@ # Fallback if urwidgets are not available def html_to_widgets(html: str) -> List[urwid.Widget]: return [ - urwid.Text(highlight_hashtags(_format_markdown(html))) + urwid.Text(_format_markdown(html)) ] def url_to_widget(url: str): From 7443d3e0b5ac25992da44235f24b252572ce7858 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 20 Nov 2023 20:59:56 -0500 Subject: [PATCH 07/10] add missing comma to install_requires list --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff10c5a3..5946faf1 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ "beautifulsoup4>=4.5.0,<5.0", "wcwidth>=0.1.7", "urwid>=2.0.0,<3.0", - "tomlkit>=0.10.0,<1.0" + "tomlkit>=0.10.0,<1.0", "html2text>=2020.1.16" ], extras_require={ From 8cb294f3c866e276edb89b048718b7e5673e097f Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Thu, 23 Nov 2023 11:10:02 -0500 Subject: [PATCH 08/10] Use Pandoc to render markdown, fallback to plaintext Also used for markdown rendering in console and copy-to-clipboard --- setup.py | 7 +- tests/test_console.py | 204 ---------------------------------- toot/output.py | 17 +-- toot/richtext/__init__.py | 25 +++++ toot/richtext/markdown.py | 11 ++ toot/tui/app.py | 12 +- toot/tui/richtext/__init__.py | 35 +++--- toot/tui/richtext/markdown.py | 21 ++++ 8 files changed, 85 insertions(+), 247 deletions(-) create mode 100644 toot/richtext/__init__.py create mode 100644 toot/richtext/markdown.py create mode 100644 toot/tui/richtext/markdown.py diff --git a/setup.py b/setup.py index 5946faf1..7ed6c153 100644 --- a/setup.py +++ b/setup.py @@ -39,12 +39,15 @@ "wcwidth>=0.1.7", "urwid>=2.0.0,<3.0", "tomlkit>=0.10.0,<1.0", - "html2text>=2020.1.16" ], extras_require={ # Required to display rich text in the TUI "richtext": [ - "urwidgets>=0.1,<0.2" + "urwidgets>=0.1,<0.2", + ], + "markdown": [ + "pypandoc>=1.12.0,<2.0", + "pypandoc-binary>=1.12.0,<2.0", ], "dev": [ "coverage", diff --git a/tests/test_console.py b/tests/test_console.py index 5eeb1717..1d321df0 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -152,210 +152,6 @@ def test_timeline(mock_get, monkeypatch, capsys): assert err == "" -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - -@mock.patch('toot.http.get') -def test_timeline_html_content(mock_get, monkeypatch, capsys): - mock_get.return_value = MockResponse([{ - 'id': '111111111111111111', - 'account': { - 'display_name': 'Frank Zappa 🎸', - 'acct': 'fz' - }, - 'created_at': '2017-04-12T15:53:18.174Z', - 'content': "

HTML Render Test

emphasized
underlined
bold
bold and italic
strikethrough
regular text

Code block:

10 PRINT \"HELLO WORLD\"
20 GOTO 10

Something blockquoted here. The indentation is maintained as the text line wraps.

  1. List item
    • Nested item
    • Another nested
  2. Another list item.
    1. Something else nested
    2. And a last nested

Blockquote

  1. List in BQ
  2. List item 2 in BQ

#hashtag #test
https://a.com text after link

", - 'reblog': None, - 'in_reply_to_id': None, - 'media_attachments': [], - }]) - - console.run_command(app, user, 'timeline', ['--once']) - - mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) - - out, err = capsys.readouterr() - lines = out.split("\n") - reference = [ - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC", - "", - "## HTML Render Test", - "", - " _emphasized_ ", - " _underlined_ ", - " **bold** ", - " ** _bold and italic_** ", - " ~~strikethrough~~ ", - "regular text", - "", - "Code block:", - "", - " ", - " 10 PRINT \"HELLO WORLD\" ", - " 20 GOTO 10 ", - " ", - "> Something blockquoted here. The indentation is maintained as the text line wraps.", - " 1. List item", - " • Nested item", - " • Another nested ", - " 2. Another list item. ", - " 1. Something else nested", - " 2. And a last nested", - "", - "> Blockquote", - "> 1. List in BQ", - "> 2. List item 2 in BQ", - ">", - "", - "#hashtag #test ", - "https://a.com text after link", - "", - "ID 111111111111111111 ", - "────────────────────────────────────────────────────────────────────────────────────────────────────", - "", - ] - - assert len(lines) == len(reference) - for index, line in enumerate(lines): - assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}" - - assert err == "" - - @mock.patch('toot.http.get') def test_timeline_with_re(mock_get, monkeypatch, capsys): mock_get.return_value = MockResponse([{ diff --git a/toot/output.py b/toot/output.py index 7bdc3332..9bf7d919 100644 --- a/toot/output.py +++ b/toot/output.py @@ -2,11 +2,11 @@ import re import sys import textwrap -import html2text from functools import lru_cache from toot import settings from toot.utils import get_text +from toot.richtext import html_to_text from toot.entities import Account, Instance, Notification, Poll, Status from toot.wcstring import wc_wrap from typing import List @@ -321,20 +321,9 @@ def print_status(status: Status, width: int = 80): def print_html(text, width=80): - h2t = html2text.HTML2Text() - - h2t.body_width = width - h2t.single_line_break = True - h2t.ignore_links = True - h2t.wrap_links = True - h2t.wrap_list_items = True - h2t.wrap_tables = True - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" - markdown = h2t.handle(text).strip() - + markdown = "\n".join(html_to_text(text, columns=width, highlight_tags=False)) print_out("") - print_out(highlight_hashtags(markdown)) + print_out(markdown) def print_poll(poll: Poll): diff --git a/toot/richtext/__init__.py b/toot/richtext/__init__.py new file mode 100644 index 00000000..9888a5de --- /dev/null +++ b/toot/richtext/__init__.py @@ -0,0 +1,25 @@ +from toot.tui.utils import highlight_hashtags +from toot.utils import html_to_paragraphs +from toot.wcstring import wc_wrap +from typing import List + +try: + # first preference, render markup with pypandoc + from .markdown import html_to_text + +except ImportError: + # Fallback to render in plaintext + def html_to_text(html: str, columns=80, highlight_tags=False) -> List: + output = [] + first = True + for paragraph in html_to_paragraphs(html): + if not first: + output.append("") + for line in paragraph: + for subline in wc_wrap(line, columns): + if highlight_tags: + output.append(highlight_hashtags(subline)) + else: + output.append(subline) + first = False + return output diff --git a/toot/richtext/markdown.py b/toot/richtext/markdown.py new file mode 100644 index 00000000..a3ea03c1 --- /dev/null +++ b/toot/richtext/markdown.py @@ -0,0 +1,11 @@ +from pypandoc import convert_text +from typing import List + + +def html_to_text(html: str, columns=80, highlight_tags=False) -> List: + return [convert_text( + html, + format="html", + to="gfm-raw_html", + extra_args=["--wrap=auto", f"--columns={columns}"], + )] diff --git a/toot/tui/app.py b/toot/tui/app.py index d90428d9..838b7b37 100644 --- a/toot/tui/app.py +++ b/toot/tui/app.py @@ -1,13 +1,13 @@ import logging import subprocess import urwid -import html2text from concurrent.futures import ThreadPoolExecutor from toot import api, config, __version__, settings from toot.console import get_default_visibility from toot.exceptions import ApiError +from toot.richtext import html_to_text from toot.utils.datetime import parse_datetime from .compose import StatusComposer @@ -656,12 +656,8 @@ def _done(loop): return self.run_in_thread(_delete, done_callback=_done) def copy_status(self, status): - h2t = html2text.HTML2Text() - h2t.body_width = 0 # nowrap - h2t.single_line_break = True - h2t.ignore_links = True - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" + + markdown = "\n".join(html_to_text(status.original.data["content"], columns=1024, highlight_tags=False)) time = parse_datetime(status.original.data['created_at']) time = time.strftime('%Y-%m-%d %H:%M %Z') @@ -671,7 +667,7 @@ def copy_status(self, status): + "\n" + (status.original.author.account or "") + "\n\n" - + h2t.handle(status.original.data["content"]).strip() + + markdown + "\n\n" + f"Created at: {time}") diff --git a/toot/tui/richtext/__init__.py b/toot/tui/richtext/__init__.py index 2793493f..e0e43dcf 100644 --- a/toot/tui/richtext/__init__.py +++ b/toot/tui/richtext/__init__.py @@ -1,27 +1,24 @@ import urwid -import html2text - +from toot.tui.utils import highlight_hashtags +from toot.utils import format_content from typing import List try: + # our first preference is to render using urwidgets from .richtext import html_to_widgets, url_to_widget + except ImportError: - # Fallback if urwidgets are not available - def html_to_widgets(html: str) -> List[urwid.Widget]: - return [ - urwid.Text(_format_markdown(html)) - ] + try: + # second preference, render markup with pypandoc + from .markdown import html_to_widgets, url_to_widget + + except ImportError: + # Fallback to render in plaintext - def url_to_widget(url: str): - return urwid.Text(("link", url)) + def url_to_widget(url: str): + return urwid.Text(("link", url)) - def _format_markdown(html) -> str: - h2t = html2text.HTML2Text() - h2t.single_line_break = True - h2t.ignore_links = True - h2t.wrap_links = False - h2t.wrap_list_items = False - h2t.wrap_tables = False - h2t.unicode_snob = True - h2t.ul_item_mark = "\N{bullet}" - return h2t.handle(html).strip() + def html_to_widgets(html: str) -> List[urwid.Widget]: + return [ + urwid.Text(highlight_hashtags(line)) for line in format_content(html) + ] diff --git a/toot/tui/richtext/markdown.py b/toot/tui/richtext/markdown.py new file mode 100644 index 00000000..dcc5e7a8 --- /dev/null +++ b/toot/tui/richtext/markdown.py @@ -0,0 +1,21 @@ +import urwid +from pypandoc import convert_text + +from typing import List + + +def url_to_widget(url: str): + return urwid.Text(("link", url)) + + +def html_to_widgets(html: str) -> List[urwid.Widget]: + return [ + urwid.Text( + convert_text( + html, + format="html", + to="gfm-raw_html", + extra_args=["--wrap=none"], + ) + ) + ] From 1e8c5f1170a5700ea321b743341b0581c4968c58 Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Mon, 27 Nov 2023 20:22:37 -0500 Subject: [PATCH 09/10] Added --plaintext option to command line tools Now we default to markdown when displaying status messages; the --plaintext option reverts to previous behavior. This switch is used in test_console.py so existing tests will pass; we can add tests in future without --plaintext to test the markdown rendering. --- .github/workflows/test.yml | 2 +- tests/test_console.py | 8 ++++---- toot/commands.py | 9 +++++---- toot/console.py | 14 ++++++++++++-- toot/output.py | 20 ++++++++++---------- toot/richtext/__init__.py | 24 +++++++----------------- toot/richtext/markdown.py | 20 +++++++++++++------- toot/richtext/plaintext.py | 20 ++++++++++++++++++++ 8 files changed, 72 insertions(+), 45 deletions(-) create mode 100644 toot/richtext/plaintext.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5417a2f0..520ee76e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e ".[test,richtext]" + pip install -e ".[test,richtext,markdown]" - name: Run tests run: | pytest diff --git a/tests/test_console.py b/tests/test_console.py index 1d321df0..eb004d92 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -132,7 +132,7 @@ def test_timeline(mock_get, monkeypatch, capsys): 'media_attachments': [], }]) - console.run_command(app, user, 'timeline', ['--once']) + console.run_command(app, user, 'timeline', ['--once', '--plaintext']) mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) @@ -175,7 +175,7 @@ def test_timeline_with_re(mock_get, monkeypatch, capsys): 'media_attachments': [], }]) - console.run_command(app, user, 'timeline', ['--once']) + console.run_command(app, user, 'timeline', ['--once', '--plaintext']) mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) @@ -237,7 +237,7 @@ def test_thread(mock_get, monkeypatch, capsys): }), ] - console.run_command(app, user, 'thread', ['111111111111111111']) + console.run_command(app, user, 'thread', ['--plaintext', '111111111111111111']) calls = [ mock.call(app, user, '/api/v1/statuses/111111111111111111'), @@ -553,7 +553,7 @@ def test_notifications(mock_get, capsys): }, }]) - console.run_command(app, user, 'notifications', []) + console.run_command(app, user, 'notifications', ['--plaintext']) mock_get.assert_called_once_with(app, user, '/api/v1/notifications', {'exclude_types[]': [], 'limit': 20}) diff --git a/toot/commands.py b/toot/commands.py index e16d8f15..3e7ee4d3 100644 --- a/toot/commands.py +++ b/toot/commands.py @@ -58,7 +58,8 @@ def timeline(app, user, args, generator=None): items = reversed(items) statuses = [from_dict(Status, item) for item in items] - print_timeline(statuses) + + print_timeline(statuses, render_mode="plaintext" if args.plaintext else "markdown") if args.once or not sys.stdout.isatty(): break @@ -71,7 +72,7 @@ def timeline(app, user, args, generator=None): def status(app, user, args): status = api.single_status(app, user, args.status_id) status = from_dict(Status, status) - print_status(status) + print_status(status, render_mode="plaintext" if args.plaintext else "markdown") def thread(app, user, args): @@ -87,7 +88,7 @@ def thread(app, user, args): thread.append(item) statuses = [from_dict(Status, s) for s in thread] - print_timeline(statuses) + print_timeline(statuses, render_mode="plaintext" if args.plaintext else "markdown") def post(app, user, args): @@ -572,7 +573,7 @@ def notifications(app, user, args): notifications = reversed(notifications) notifications = [from_dict(Notification, n) for n in notifications] - print_notifications(notifications) + print_notifications(notifications, render_mode="plaintext" if args.plaintext else "markdown") def tui(app, user, args): diff --git a/toot/console.py b/toot/console.py index 9515d68a..86e08d88 100644 --- a/toot/console.py +++ b/toot/console.py @@ -235,7 +235,13 @@ def editor(value): json_arg = (["--json"], { "action": "store_true", "default": False, - "help": "print json instead of plaintext", + "help": "print json instead of standard text", +}) + +plaintext_arg = (["-pt", "--plaintext"], { + "action": "store_true", + "help": "Render status messages in plaintext, not markdown", + "default": False, }) # Arguments for selecting a timeline (see `toot.commands.get_timeline_generator`) @@ -284,6 +290,7 @@ def editor(value): "default": False, "help": "Only show the first toots, do not prompt to continue.", }), + plaintext_arg, ] timeline_args = common_timeline_args + timeline_and_bookmark_args @@ -426,7 +433,8 @@ def editor(value): "action": "store_true", "default": False, "help": "Only print mentions", - }) + }), + plaintext_arg, ], require_auth=True, ), @@ -464,6 +472,7 @@ def editor(value): (["status_id"], { "help": "Show thread for toot.", }), + plaintext_arg, ], require_auth=True, ), @@ -474,6 +483,7 @@ def editor(value): (["status_id"], { "help": "ID of the status to show.", }), + plaintext_arg, ], require_auth=True, ), diff --git a/toot/output.py b/toot/output.py index 9bf7d919..8f0c28ed 100644 --- a/toot/output.py +++ b/toot/output.py @@ -277,7 +277,7 @@ def print_search_results(results): print_out("Nothing found") -def print_status(status: Status, width: int = 80): +def print_status(status: Status, width=80, render_mode=""): status_id = status.id in_reply_to_id = status.in_reply_to_id reblogged_by = status.account if status.reblog else None @@ -299,7 +299,7 @@ def print_status(status: Status, width: int = 80): f"{time}", ) - print_html(status.content, width) + print_html(status.content, width, render_mode=render_mode) if status.media_attachments: print_out("\nMedia:") @@ -320,8 +320,8 @@ def print_status(status: Status, width: int = 80): ) -def print_html(text, width=80): - markdown = "\n".join(html_to_text(text, columns=width, highlight_tags=False)) +def print_html(text, width=80, render_mode=""): + markdown = "\n".join(html_to_text(text, columns=width, render_mode=render_mode, highlight_tags=False)) print_out("") print_out(markdown) @@ -352,10 +352,10 @@ def print_poll(poll: Poll): print_out(poll_footer) -def print_timeline(items: List[Status], width=100): +def print_timeline(items: List[Status], width=100, render_mode=""): print_out("─" * width) for item in items: - print_status(item, width) + print_status(item, width, render_mode=render_mode) print_out("─" * width) @@ -367,7 +367,7 @@ def print_timeline(items: List[Status], width=100): } -def print_notification(notification: Notification, width=100): +def print_notification(notification: Notification, width=100, render_mode=""): account = f"{notification.account.display_name} @{notification.account.acct}" msg = notification_msgs.get(notification.type) if msg is None: @@ -376,10 +376,10 @@ def print_notification(notification: Notification, width=100): print_out("─" * width) print_out(msg.format(account=account)) if notification.status: - print_status(notification.status, width) + print_status(notification.status, width, render_mode=render_mode) -def print_notifications(notifications: List[Notification], width=100): +def print_notifications(notifications: List[Notification], render_mode="", width=100): for notification in notifications: - print_notification(notification) + print_notification(notification, render_mode=render_mode) print_out("─" * width) diff --git a/toot/richtext/__init__.py b/toot/richtext/__init__.py index 9888a5de..71bd8b58 100644 --- a/toot/richtext/__init__.py +++ b/toot/richtext/__init__.py @@ -1,6 +1,5 @@ -from toot.tui.utils import highlight_hashtags -from toot.utils import html_to_paragraphs -from toot.wcstring import wc_wrap +from toot.exceptions import ConsoleError +from toot.richtext.plaintext import html_to_plaintext from typing import List try: @@ -9,17 +8,8 @@ except ImportError: # Fallback to render in plaintext - def html_to_text(html: str, columns=80, highlight_tags=False) -> List: - output = [] - first = True - for paragraph in html_to_paragraphs(html): - if not first: - output.append("") - for line in paragraph: - for subline in wc_wrap(line, columns): - if highlight_tags: - output.append(highlight_hashtags(subline)) - else: - output.append(subline) - first = False - return output + def html_to_text(html: str, columns=80, render_mode: str = "", highlight_tags=False) -> List: + if render_mode == "markdown": + raise ConsoleError("Can't render as markdown because the pypandoc library is not available.") + + return html_to_plaintext(html, columns, highlight_tags) diff --git a/toot/richtext/markdown.py b/toot/richtext/markdown.py index a3ea03c1..a3184bb0 100644 --- a/toot/richtext/markdown.py +++ b/toot/richtext/markdown.py @@ -1,11 +1,17 @@ +from toot.exceptions import ConsoleError from pypandoc import convert_text +from toot.richtext.plaintext import html_to_plaintext from typing import List -def html_to_text(html: str, columns=80, highlight_tags=False) -> List: - return [convert_text( - html, - format="html", - to="gfm-raw_html", - extra_args=["--wrap=auto", f"--columns={columns}"], - )] +def html_to_text(html: str, columns=80, render_mode: str = "", highlight_tags=False) -> List: + if render_mode == "plaintext": + return html_to_plaintext(html, columns, highlight_tags) + elif render_mode == "markdown" or render_mode == "": + return [convert_text( + html, + format="html", + to="gfm-raw_html", + extra_args=["--wrap=auto", f"--columns={columns}"], + )] + raise ConsoleError("Unknown render mode; specify 'plaintext' or 'markdown'") diff --git a/toot/richtext/plaintext.py b/toot/richtext/plaintext.py new file mode 100644 index 00000000..d6285a1d --- /dev/null +++ b/toot/richtext/plaintext.py @@ -0,0 +1,20 @@ +from toot.utils import html_to_paragraphs +from toot.wcstring import wc_wrap +from toot.tui.utils import highlight_hashtags +from typing import List + + +def html_to_plaintext(html: str, columns=80, highlight_tags=False) -> List: + output = [] + first = True + for paragraph in html_to_paragraphs(html): + if not first: + output.append("") + for line in paragraph: + for subline in wc_wrap(line, columns): + if highlight_tags: + output.append(highlight_hashtags(subline)) + else: + output.append(subline) + first = False + return output From 32345e9a9f48e0e82b1e726905edbc29fa22731d Mon Sep 17 00:00:00 2001 From: Daniel Schwarz Date: Fri, 1 Dec 2023 17:08:51 -0500 Subject: [PATCH 10/10] Make plaintext the default for command line line output Markdown is specified by using the -md or --markdown option --- tests/test_console.py | 8 ++++---- toot/commands.py | 8 ++++---- toot/console.py | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_console.py b/tests/test_console.py index eb004d92..1d321df0 100644 --- a/tests/test_console.py +++ b/tests/test_console.py @@ -132,7 +132,7 @@ def test_timeline(mock_get, monkeypatch, capsys): 'media_attachments': [], }]) - console.run_command(app, user, 'timeline', ['--once', '--plaintext']) + console.run_command(app, user, 'timeline', ['--once']) mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) @@ -175,7 +175,7 @@ def test_timeline_with_re(mock_get, monkeypatch, capsys): 'media_attachments': [], }]) - console.run_command(app, user, 'timeline', ['--once', '--plaintext']) + console.run_command(app, user, 'timeline', ['--once']) mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10}) @@ -237,7 +237,7 @@ def test_thread(mock_get, monkeypatch, capsys): }), ] - console.run_command(app, user, 'thread', ['--plaintext', '111111111111111111']) + console.run_command(app, user, 'thread', ['111111111111111111']) calls = [ mock.call(app, user, '/api/v1/statuses/111111111111111111'), @@ -553,7 +553,7 @@ def test_notifications(mock_get, capsys): }, }]) - console.run_command(app, user, 'notifications', ['--plaintext']) + console.run_command(app, user, 'notifications', []) mock_get.assert_called_once_with(app, user, '/api/v1/notifications', {'exclude_types[]': [], 'limit': 20}) diff --git a/toot/commands.py b/toot/commands.py index 3e7ee4d3..2d4be4fe 100644 --- a/toot/commands.py +++ b/toot/commands.py @@ -59,7 +59,7 @@ def timeline(app, user, args, generator=None): statuses = [from_dict(Status, item) for item in items] - print_timeline(statuses, render_mode="plaintext" if args.plaintext else "markdown") + print_timeline(statuses, render_mode="markdown" if args.markdown else "plaintext") if args.once or not sys.stdout.isatty(): break @@ -72,7 +72,7 @@ def timeline(app, user, args, generator=None): def status(app, user, args): status = api.single_status(app, user, args.status_id) status = from_dict(Status, status) - print_status(status, render_mode="plaintext" if args.plaintext else "markdown") + print_status(status, render_mode="markdown" if args.markdown else "plaintext") def thread(app, user, args): @@ -88,7 +88,7 @@ def thread(app, user, args): thread.append(item) statuses = [from_dict(Status, s) for s in thread] - print_timeline(statuses, render_mode="plaintext" if args.plaintext else "markdown") + print_timeline(statuses, render_mode="markdown" if args.markdown else "plaintext") def post(app, user, args): @@ -573,7 +573,7 @@ def notifications(app, user, args): notifications = reversed(notifications) notifications = [from_dict(Notification, n) for n in notifications] - print_notifications(notifications, render_mode="plaintext" if args.plaintext else "markdown") + print_notifications(notifications, render_mode="markdown" if args.markdown else "plaintext") def tui(app, user, args): diff --git a/toot/console.py b/toot/console.py index 86e08d88..6a597d60 100644 --- a/toot/console.py +++ b/toot/console.py @@ -235,12 +235,12 @@ def editor(value): json_arg = (["--json"], { "action": "store_true", "default": False, - "help": "print json instead of standard text", + "help": "print json instead of plaintext", }) -plaintext_arg = (["-pt", "--plaintext"], { +markdown_arg = (["-md", "--markdown"], { "action": "store_true", - "help": "Render status messages in plaintext, not markdown", + "help": "print status messages in markdown instead of plaintext", "default": False, }) @@ -290,7 +290,7 @@ def editor(value): "default": False, "help": "Only show the first toots, do not prompt to continue.", }), - plaintext_arg, + markdown_arg, ] timeline_args = common_timeline_args + timeline_and_bookmark_args @@ -434,7 +434,7 @@ def editor(value): "default": False, "help": "Only print mentions", }), - plaintext_arg, + markdown_arg, ], require_auth=True, ), @@ -472,7 +472,7 @@ def editor(value): (["status_id"], { "help": "Show thread for toot.", }), - plaintext_arg, + markdown_arg, ], require_auth=True, ), @@ -483,7 +483,7 @@ def editor(value): (["status_id"], { "help": "ID of the status to show.", }), - plaintext_arg, + markdown_arg, ], require_auth=True, ),