From 8d4a569291525277f23f011b43f2e0fe54233d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Wed, 4 Sep 2024 02:02:04 +0100 Subject: [PATCH 1/7] Fix fetching lyrics from lrclib Adjust the base URL to perform a '/search' instead of attempting to '/get' specific lyrics where we're unlikely to find lyrics for the specific combination of album, artist, track names and the duration (see https://lrclib.net/docs). Since we receive an array of matching lyrics candidates, rank them by their duration similarity to the item's duration, and whether they contain synced lyrics. --- beetsplug/lyrics.py | 59 +++++++++++++++++++++++++++++--- docs/changelog.rst | 4 +++ test/plugins/test_lyrics.py | 68 +++++++++++++++++++++++-------------- 3 files changed, 100 insertions(+), 31 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index c00194f091..736b71ea55 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -30,6 +30,7 @@ from urllib.parse import quote, urlencode import requests +from typing_extensions import TypedDict from unidecode import unidecode import beets @@ -266,12 +267,55 @@ def fetch( raise NotImplementedError +class LRCLibItem(TypedDict): + """Lyrics data item returned by the LRCLib API.""" + + id: int + name: str + trackName: str + artistName: str + albumName: str + duration: float + instrumental: bool + plainLyrics: str + syncedLyrics: str | None + + class LRCLib(Backend): - base_url = "https://lrclib.net/api/get" + base_url = "https://lrclib.net/api/search" + + @staticmethod + def get_rank( + target_duration: float, item: LRCLibItem + ) -> tuple[float, bool]: + """Rank the given lyrics item. + + Return a tuple with the following values: + 1. Absolute difference between lyrics and target duration + 2. Boolean telling whether synced lyrics are available. + """ + return ( + abs(item["duration"] - target_duration), + not item["syncedLyrics"], + ) + + @classmethod + def pick_lyrics( + cls, target_duration: float, data: list[LRCLibItem] + ) -> LRCLibItem: + """Return best matching lyrics item from the given list. + + Best lyrics match is the one that has the closest duration to + ``target_duration`` and has synced lyrics available. + + Note that the incoming list is guaranteed to be non-empty. + """ + return min(data, key=lambda item: cls.get_rank(target_duration, item)) def fetch( self, artist: str, title: str, album: str, length: int ) -> str | None: + """Fetch lyrics for the given artist, title, and album.""" params: dict[str, str | int] = { "artist_name": artist, "track_name": title, @@ -288,15 +332,20 @@ def fetch( params=params, timeout=10, ) - data = response.json() + data: list[LRCLibItem] = response.json() except (requests.RequestException, json.decoder.JSONDecodeError) as exc: self._log.debug("LRCLib API request failed: {0}", exc) return None - if self.config["synced"]: - return data.get("syncedLyrics") or data.get("plainLyrics") + if not data: + return None + + item = self.pick_lyrics(length, data) + + if self.config["synced"] and (synced_lyrics := item["syncedLyrics"]): + return synced_lyrics - return data.get("plainLyrics") + return item["plainLyrics"] class DirectBackend(Backend): diff --git a/docs/changelog.rst b/docs/changelog.rst index fef29f5461..d3b39184cc 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -48,6 +48,10 @@ Bug fixes: * :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the artist or title is missing and ignore ``artist_sort`` value if it is empty. :bug:`2635` +* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of + attempting to fetch lyrics for a specific album, artist, title and duration + combination, the plugin now performs a search which yields many results. + :bug:`5102` For packagers: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 99e6f8a4e8..dcffeede33 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -228,7 +228,7 @@ def _patch_google_search(self, requests_mock, lyrics_page): def test_backend_source(self, lyrics_plugin, lyrics_page: LyricsPage): """Test parsed lyrics from each of the configured lyrics pages.""" lyrics = lyrics_plugin.get_lyrics( - lyrics_page.artist, lyrics_page.track_title, "", 0 + lyrics_page.artist, lyrics_page.track_title, "", 186 ) assert lyrics @@ -340,32 +340,35 @@ def test_scrape(self, backend, lyrics_html, expecting_lyrics): assert bool(backend.extract_lyrics(lyrics_html)) == expecting_lyrics +LYRICS_DURATION = 950 + + +def lyrics_match(**overrides): + return { + "duration": LYRICS_DURATION, + "syncedLyrics": "synced", + "plainLyrics": "plain", + **overrides, + } + + class TestLRCLibLyrics(LyricsBackendTest): + ITEM_DURATION = 999 + @pytest.fixture(scope="class") def backend_name(self): return "lrclib" @pytest.fixture def fetch_lyrics(self, backend, requests_mock, response_data): - requests_mock.get(lyrics.LRCLib.base_url, json=response_data) + requests_mock.get(backend.base_url, json=response_data) - return partial(backend.fetch, "la", "la", "la", 0) + return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION) - @pytest.mark.parametrize( - "response_data", - [ - { - "syncedLyrics": "[00:00.00] la la la", - "plainLyrics": "la la la", - } - ], - ) + @pytest.mark.parametrize("response_data", [[lyrics_match()]]) @pytest.mark.parametrize( "plugin_config, expected_lyrics", - [ - ({"synced": True}, "[00:00.00] la la la"), - ({"synced": False}, "la la la"), - ], + [({"synced": True}, "synced"), ({"synced": False}, "plain")], ) def test_synced_config_option(self, fetch_lyrics, expected_lyrics): assert fetch_lyrics() == expected_lyrics @@ -373,21 +376,34 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): @pytest.mark.parametrize( "response_data, expected_lyrics", [ + pytest.param([], None, id="handle non-matching lyrics"), pytest.param( - {"syncedLyrics": "", "plainLyrics": "la la la"}, - "la la la", - id="pick plain lyrics", + [lyrics_match()], "synced", id="synced when available" ), pytest.param( - { - "statusCode": 404, - "error": "Not Found", - "message": "Failed to find specified track", - }, - None, - id="not found", + [lyrics_match(syncedLyrics=None)], + "plain", + id="plain by default", + ), + pytest.param( + [ + lyrics_match( + duration=ITEM_DURATION, + syncedLyrics=None, + plainLyrics="plain with closer duration", + ), + lyrics_match(syncedLyrics="synced", plainLyrics="plain 2"), + ], + "plain with closer duration", + id="prefer closer duration", + ), + pytest.param( + [lyrics_match(syncedLyrics=None), lyrics_match()], + "synced", + id="prefer match with synced lyrics", ), ], ) + @pytest.mark.parametrize("plugin_config", [{"synced": True}]) def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics): assert fetch_lyrics() == expected_lyrics From a398fbe62df648526dfeb038692470bf2a233e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Tue, 27 Aug 2024 18:14:38 +0100 Subject: [PATCH 2/7] LRCLib: Improve exception handling --- beetsplug/lyrics.py | 41 ++++++++++++++++++++++--------------- test/plugins/test_lyrics.py | 26 +++++++++++++++++++++-- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 736b71ea55..23cf9c68c2 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -284,6 +284,19 @@ class LRCLibItem(TypedDict): class LRCLib(Backend): base_url = "https://lrclib.net/api/search" + def warn(self, message: str, *args) -> None: + """Log a warning message with the class name.""" + self._log.warning(f"{self.__class__.__name__}: {message}", *args) + + def fetch_json(self, *args, **kwargs): + """Wrap the request method to raise an exception on HTTP errors.""" + kwargs.setdefault("timeout", 10) + kwargs.setdefault("headers", {"User-Agent": USER_AGENT}) + r = requests.get(*args, **kwargs) + r.raise_for_status() + + return r.json() + @staticmethod def get_rank( target_duration: float, item: LRCLibItem @@ -327,25 +340,21 @@ def fetch( params["duration"] = length try: - response = requests.get( - self.base_url, - params=params, - timeout=10, - ) - data: list[LRCLibItem] = response.json() - except (requests.RequestException, json.decoder.JSONDecodeError) as exc: - self._log.debug("LRCLib API request failed: {0}", exc) - return None - - if not data: - return None + data = self.fetch_json(self.base_url, params=params) + except requests.JSONDecodeError: + self.warn("Could not decode response JSON data") + except requests.RequestException as exc: + self.warn("Request error: {}", exc) + else: + if data: + item = self.pick_lyrics(length, data) - item = self.pick_lyrics(length, data) + if self.config["synced"] and (synced := item["syncedLyrics"]): + return synced - if self.config["synced"] and (synced_lyrics := item["syncedLyrics"]): - return synced_lyrics + return item["plainLyrics"] - return item["plainLyrics"] + return None class DirectBackend(Backend): diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index dcffeede33..dceec79b58 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -14,7 +14,9 @@ """Tests for the 'lyrics' plugin.""" +import re from functools import partial +from http import HTTPStatus import pytest @@ -360,8 +362,12 @@ def backend_name(self): return "lrclib" @pytest.fixture - def fetch_lyrics(self, backend, requests_mock, response_data): - requests_mock.get(backend.base_url, json=response_data) + def request_kwargs(self, response_data): + return {"json": response_data} + + @pytest.fixture + def fetch_lyrics(self, backend, requests_mock, request_kwargs): + requests_mock.get(backend.base_url, **request_kwargs) return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION) @@ -407,3 +413,19 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): @pytest.mark.parametrize("plugin_config", [{"synced": True}]) def test_fetch_lyrics(self, fetch_lyrics, expected_lyrics): assert fetch_lyrics() == expected_lyrics + + @pytest.mark.parametrize( + "request_kwargs, expected_log_match", + [ + ( + {"status_code": HTTPStatus.BAD_GATEWAY}, + r"LRCLib: Request error: 502", + ), + ({"text": "invalid"}, r"LRCLib: Could not decode.*JSON"), + ], + ) + def test_error(self, caplog, fetch_lyrics, expected_log_match): + assert fetch_lyrics() is None + assert caplog.messages + assert (last_log := caplog.messages[-1]) + assert re.search(expected_log_match, last_log, re.I) From 30379bca389fc42f9801d770a31cba6a7c38b22c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Tue, 27 Aug 2024 11:22:34 +0100 Subject: [PATCH 3/7] Update lyrics.sources configuration to prioritize lrclib --- beetsplug/lyrics.py | 2 +- docs/changelog.rst | 2 ++ docs/plugins/lyrics.rst | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 23cf9c68c2..eedf1e0e25 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -783,7 +783,7 @@ def fetch(self, artist: str, title: str, *_) -> str | None: class LyricsPlugin(plugins.BeetsPlugin): - SOURCES = ["google", "musixmatch", "genius", "tekstowo", "lrclib"] + SOURCES = ["lrclib", "google", "musixmatch", "genius", "tekstowo"] SOURCE_BACKENDS = { "google": Google, "musixmatch": MusiXmatch, diff --git a/docs/changelog.rst b/docs/changelog.rst index d3b39184cc..d1240e9ec2 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -51,6 +51,8 @@ Bug fixes: * :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of attempting to fetch lyrics for a specific album, artist, title and duration combination, the plugin now performs a search which yields many results. + Update the default ``sources`` configuration to prioritize ``lrclib`` over + other sources since it returns reliable results quicker than others. :bug:`5102` For packagers: diff --git a/docs/plugins/lyrics.rst b/docs/plugins/lyrics.rst index 4939476938..d1f434d70f 100644 --- a/docs/plugins/lyrics.rst +++ b/docs/plugins/lyrics.rst @@ -56,7 +56,7 @@ configuration file. The available options are: sources known to be scrapeable. - **sources**: List of sources to search for lyrics. An asterisk ``*`` expands to all available sources. - Default: ``google genius tekstowo lrclib``, i.e., all the available sources. The + Default: ``lrclib google genius tekstowo``, i.e., all the available sources. The ``google`` source will be automatically deactivated if no ``google_API_key`` is setup. The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if From 2fb72c65a5e89c767e62182bee772c464cc9f2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Fri, 27 Sep 2024 23:00:01 +0100 Subject: [PATCH 4/7] lyrics/LRCLib: handle instrumental lyrics --- beetsplug/lyrics.py | 6 +++++- test/plugins/test_lyrics.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index eedf1e0e25..a938f53184 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -60,6 +60,7 @@ TAG_RE = re.compile(r"<[^>]*>") BREAK_RE = re.compile(r"\n?\s*]*)*>\s*\n?", re.I) USER_AGENT = f"beets/{beets.__version__}" +INSTRUMENTAL_LYRICS = "[Instrumental]" # The content for the base index.rst generated in ReST mode. REST_INDEX_TEMPLATE = """Lyrics @@ -349,6 +350,9 @@ def fetch( if data: item = self.pick_lyrics(length, data) + if item["instrumental"]: + return INSTRUMENTAL_LYRICS + if self.config["synced"] and (synced := item["syncedLyrics"]): return synced @@ -536,7 +540,7 @@ def _try_extracting_lyrics_from_non_data_lyrics_container(self, soup): string="This song is an instrumental", ): self._log.debug("Detected instrumental") - return "[Instrumental]" + return INSTRUMENTAL_LYRICS else: self._log.debug("Couldn't scrape page using known layouts") return None diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index dceec79b58..96ecfbba86 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -347,6 +347,7 @@ def test_scrape(self, backend, lyrics_html, expecting_lyrics): def lyrics_match(**overrides): return { + "instrumental": False, "duration": LYRICS_DURATION, "syncedLyrics": "synced", "plainLyrics": "plain", @@ -386,6 +387,11 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): pytest.param( [lyrics_match()], "synced", id="synced when available" ), + pytest.param( + [lyrics_match(instrumental=True)], + "[Instrumental]", + id="instrumental track", + ), pytest.param( [lyrics_match(syncedLyrics=None)], "plain", From 618c3a21a659a8b7bf36dda75c7628133e1bb3e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 19 Oct 2024 00:50:32 +0100 Subject: [PATCH 5/7] Try to GET LRCLib lyrics before searching --- beetsplug/lyrics.py | 154 +++++++++++++++++++++++++----------- docs/changelog.rst | 11 +-- test/plugins/test_lyrics.py | 12 ++- 3 files changed, 122 insertions(+), 55 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index a938f53184..764c8b8579 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -25,8 +25,11 @@ import struct import unicodedata import warnings -from functools import partial -from typing import TYPE_CHECKING, ClassVar +from contextlib import suppress +from dataclasses import dataclass +from functools import cached_property, partial, total_ordering +from http import HTTPStatus +from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator from urllib.parse import quote, urlencode import requests @@ -98,6 +101,10 @@ """ +class NotFoundError(requests.exceptions.HTTPError): + pass + + # Utilities. @@ -276,14 +283,80 @@ class LRCLibItem(TypedDict): trackName: str artistName: str albumName: str - duration: float + duration: float | None instrumental: bool plainLyrics: str syncedLyrics: str | None +@dataclass +@total_ordering +class LRCLyrics: + #: Percentage tolerance for max duration difference between lyrics and item. + DURATION_DIFF_TOLERANCE = 0.05 + + target_duration: float + duration: float + instrumental: bool + plain: str + synced: str | None + + def __le__(self, other: LRCLyrics) -> bool: + """Compare two lyrics items by their score.""" + return self.dist < other.dist + + @classmethod + def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics: + return cls( + target_duration, + candidate["duration"] or 0.0, + candidate["instrumental"], + candidate["plainLyrics"], + candidate["syncedLyrics"], + ) + + @cached_property + def duration_dist(self) -> float: + """Return the absolute difference between lyrics and target duration.""" + return abs(self.duration - self.target_duration) + + @cached_property + def is_valid(self) -> bool: + """Return whether the lyrics item is valid. + Lyrics duration must be within the tolerance defined by + :attr:`DURATION_DIFF_TOLERANCE`. + """ + return ( + self.duration_dist + <= self.target_duration * self.DURATION_DIFF_TOLERANCE + ) + + @cached_property + def dist(self) -> tuple[float, bool]: + """Distance/score of the given lyrics item. + + Return a tuple with the following values: + 1. Absolute difference between lyrics and target duration + 2. Boolean telling whether synced lyrics are available. + + Best lyrics match is the one that has the closest duration to + ``target_duration`` and has synced lyrics available. + """ + return self.duration_dist, not self.synced + + def get_text(self, want_synced: bool) -> str: + if self.instrumental: + return INSTRUMENTAL_LYRICS + + return self.synced if want_synced and self.synced else self.plain + + class LRCLib(Backend): - base_url = "https://lrclib.net/api/search" + """Fetch lyrics from the LRCLib API.""" + + BASE_URL = "https://lrclib.net/api" + GET_URL = f"{BASE_URL}/get" + SEARCH_URL = f"{BASE_URL}/search" def warn(self, message: str, *args) -> None: """Log a warning message with the class name.""" @@ -294,69 +367,54 @@ def fetch_json(self, *args, **kwargs): kwargs.setdefault("timeout", 10) kwargs.setdefault("headers", {"User-Agent": USER_AGENT}) r = requests.get(*args, **kwargs) + if r.status_code == HTTPStatus.NOT_FOUND: + raise NotFoundError("HTTP Error: Not Found", response=r) r.raise_for_status() return r.json() - @staticmethod - def get_rank( - target_duration: float, item: LRCLibItem - ) -> tuple[float, bool]: - """Rank the given lyrics item. + def fetch_candidates( + self, artist: str, title: str, album: str, length: int + ) -> Iterator[list[LRCLibItem]]: + """Yield lyrics candidates for the given song data. - Return a tuple with the following values: - 1. Absolute difference between lyrics and target duration - 2. Boolean telling whether synced lyrics are available. + Firstly, attempt to GET lyrics directly, and then search the API if + lyrics are not found or the duration does not match. + + Return an iterator over lists of candidates. """ - return ( - abs(item["duration"] - target_duration), - not item["syncedLyrics"], - ) + base_params = {"artist_name": artist, "track_name": title} + get_params = {**base_params, "duration": length} + if album: + get_params["album_name"] = album - @classmethod - def pick_lyrics( - cls, target_duration: float, data: list[LRCLibItem] - ) -> LRCLibItem: - """Return best matching lyrics item from the given list. + with suppress(NotFoundError): + yield [self.fetch_json(self.GET_URL, params=get_params)] - Best lyrics match is the one that has the closest duration to - ``target_duration`` and has synced lyrics available. + yield self.fetch_json(self.SEARCH_URL, params=base_params) - Note that the incoming list is guaranteed to be non-empty. - """ - return min(data, key=lambda item: cls.get_rank(target_duration, item)) + @classmethod + def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None: + """Return best matching lyrics item from the given list.""" + return min((li for li in lyrics if li.is_valid), default=None) def fetch( self, artist: str, title: str, album: str, length: int ) -> str | None: - """Fetch lyrics for the given artist, title, and album.""" - params: dict[str, str | int] = { - "artist_name": artist, - "track_name": title, - } - if album: - params["album_name"] = album - - if length: - params["duration"] = length + """Fetch lyrics text for the given song data.""" + evaluate_item = partial(LRCLyrics.make, target_duration=length) try: - data = self.fetch_json(self.base_url, params=params) + for group in self.fetch_candidates(artist, title, album, length): + candidates = [evaluate_item(item) for item in group] + if item := self.pick_best_match(candidates): + return item.get_text(self.config["synced"]) + except StopIteration: + pass except requests.JSONDecodeError: self.warn("Could not decode response JSON data") except requests.RequestException as exc: self.warn("Request error: {}", exc) - else: - if data: - item = self.pick_lyrics(length, data) - - if item["instrumental"]: - return INSTRUMENTAL_LYRICS - - if self.config["synced"] and (synced := item["syncedLyrics"]): - return synced - - return item["plainLyrics"] return None diff --git a/docs/changelog.rst b/docs/changelog.rst index d1240e9ec2..6ce13a8144 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -48,11 +48,12 @@ Bug fixes: * :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the artist or title is missing and ignore ``artist_sort`` value if it is empty. :bug:`2635` -* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of - attempting to fetch lyrics for a specific album, artist, title and duration - combination, the plugin now performs a search which yields many results. - Update the default ``sources`` configuration to prioritize ``lrclib`` over - other sources since it returns reliable results quicker than others. +* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we + cannot find lyrics for a specific album, artist, title combination, the + plugin now tries to search for the artist and title and picks the most + relevant result. Update the default ``sources`` configuration to prioritize + ``lrclib`` over other sources since it returns reliable results quicker than + others. :bug:`5102` For packagers: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 96ecfbba86..0dee427ec3 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -368,7 +368,8 @@ def request_kwargs(self, response_data): @pytest.fixture def fetch_lyrics(self, backend, requests_mock, request_kwargs): - requests_mock.get(backend.base_url, **request_kwargs) + requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND) + requests_mock.get(backend.SEARCH_URL, **request_kwargs) return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION) @@ -385,7 +386,14 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): [ pytest.param([], None, id="handle non-matching lyrics"), pytest.param( - [lyrics_match()], "synced", id="synced when available" + [lyrics_match()], + "synced", + id="synced when available", + ), + pytest.param( + [lyrics_match(duration=1)], + None, + id="none: duration too short", ), pytest.param( [lyrics_match(instrumental=True)], From 33aafdd50b3fb769ee38e7397661f62b79c3c36b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Wed, 23 Oct 2024 15:37:41 +0100 Subject: [PATCH 6/7] Remove trailing spaces in synced lyrics lines without text --- beetsplug/lyrics.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 764c8b8579..159518281a 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -348,7 +348,10 @@ def get_text(self, want_synced: bool) -> str: if self.instrumental: return INSTRUMENTAL_LYRICS - return self.synced if want_synced and self.synced else self.plain + if want_synced and self.synced: + return "\n".join(map(str.strip, self.synced.splitlines())) + + return self.plain class LRCLib(Backend): From bb5f3e05938a60747b9cbe9511ec83f0bb9a1fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 15 Dec 2024 00:17:07 +0000 Subject: [PATCH 7/7] lyrics: sort lrclib lyrics by synced field and query search first I found that the `/get` endpoint often returns incorrect or unsynced lyrics, while results returned by the `/search` more accurate options. Thus I reversed the change in the previous commit to prioritize searching first. --- beetsplug/lyrics.py | 15 +++++++++------ test/plugins/test_lyrics.py | 19 +++++++++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 159518281a..d1d715ce46 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -332,7 +332,7 @@ def is_valid(self) -> bool: ) @cached_property - def dist(self) -> tuple[float, bool]: + def dist(self) -> tuple[bool, float]: """Distance/score of the given lyrics item. Return a tuple with the following values: @@ -342,7 +342,7 @@ def dist(self) -> tuple[float, bool]: Best lyrics match is the one that has the closest duration to ``target_duration`` and has synced lyrics available. """ - return self.duration_dist, not self.synced + return not self.synced, self.duration_dist def get_text(self, want_synced: bool) -> str: if self.instrumental: @@ -381,8 +381,11 @@ def fetch_candidates( ) -> Iterator[list[LRCLibItem]]: """Yield lyrics candidates for the given song data. - Firstly, attempt to GET lyrics directly, and then search the API if - lyrics are not found or the duration does not match. + I found that the ``/get`` endpoint sometimes returns inaccurate or + unsynced lyrics, while ``search`` yields more suitable candidates. + Therefore, we prioritize the latter and rank the results using our own + algorithm. If the search does not give suitable lyrics, we fall back to + the ``/get`` endpoint. Return an iterator over lists of candidates. """ @@ -391,11 +394,11 @@ def fetch_candidates( if album: get_params["album_name"] = album + yield self.fetch_json(self.SEARCH_URL, params=base_params) + with suppress(NotFoundError): yield [self.fetch_json(self.GET_URL, params=get_params)] - yield self.fetch_json(self.SEARCH_URL, params=base_params) - @classmethod def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None: """Return best matching lyrics item from the given list.""" diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 0dee427ec3..47c9837701 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -414,8 +414,23 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): ), lyrics_match(syncedLyrics="synced", plainLyrics="plain 2"), ], - "plain with closer duration", - id="prefer closer duration", + "synced", + id="prefer synced lyrics even if plain duration is closer", + ), + pytest.param( + [ + lyrics_match( + duration=ITEM_DURATION, + syncedLyrics=None, + plainLyrics="valid plain", + ), + lyrics_match( + duration=1, + syncedLyrics="synced with invalid duration", + ), + ], + "valid plain", + id="ignore synced with invalid duration", ), pytest.param( [lyrics_match(syncedLyrics=None), lyrics_match()],