Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use npe2api instead of scraping PyPI for active plugins #1331

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 2 additions & 30 deletions napari-hub-commons/src/nhcommons/tests/utils/test_pypi_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,31 +137,9 @@ class TestPypiAdapter:
def setup_method(self, monkeypatch):
monkeypatch.setattr(requests, "get", self._mocked_requests_get)

def _generate_html_data(self, plugin_version_list: List[Tuple[str, str]]):
data = [
f"""
<div>
<span class="package-snippet__name">{plugin[0]}</span>
<span class="{self._version_field}">{plugin[1]}</span>
</div>
"""
for plugin in plugin_version_list
]
return "<br>".join(data)

def _mocked_requests_get(self, *args, **kwargs):
if args[0] == "https://pypi.org/search/":
params = kwargs.get("params", {})
page = params.get("page", 1000)
if (
params
and len(params) == 3
and params.get("o") == "-created"
and params.get("c") == "Framework :: napari"
and page < 3
):
data = plugins()[:2] if page == 1 else plugins()[2:]
return MockResponse(content=self._generate_html_data(data))
if args[0] == "https://api.napari.org/api/plugins":
return MockResponse(content=json.dumps({name: version for name, version in plugins()}))
elif args[0] == "https://pypi.org/pypi/napari-demo/json":
return MockResponse(content=valid_pypi_data())
elif args[0] == "https://pypi.org/pypi/default-demo/json":
Expand All @@ -171,15 +149,9 @@ def _mocked_requests_get(self, *args, **kwargs):
return MockResponse(status_code=requests.codes.not_found)

def test_get_all_plugins(self):
self._version_field = "package-snippet__version"
expected = {plugin[0]: plugin[1] for plugin in plugins()}
assert expected == pypi_adapter.get_all_plugins()

def test_get_all_plugins_invalid_response(self):
self._version_field = "foo"
with pytest.raises(ValueError):
pypi_adapter.get_all_plugins()

@pytest.mark.parametrize(
"plugin, version, extra_fields, expected",
[
Expand Down
30 changes: 9 additions & 21 deletions napari-hub-commons/src/nhcommons/utils/pypi_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,24 @@
_NAME_PATTERN = re.compile('class="package-snippet__name">(.+)</span>')
_VERSION_PATTERN = re.compile('class="package-snippet__version">(.+)</span>')
_BASE_URL = "https://pypi.org"
_SEARCH_URL = f"/search/"
_PLUGIN_DATA_URL = "/pypi/{plugin}/json"
_NPE2API_URL = "https://api.napari.org/api"

logger = logging.getLogger(__name__)


def get_all_plugins() -> Dict[str, str]:
"""
Query pypi to get all plugins.
Query npe2api to get all plugins.

Now we use the npe2api to get the list of plugins, which uses the public BigQuery pypi metadata
as a source of truth.

The previous implementation was broken by anti-scraping changes to PyPI.
:returns: all plugin names and latest version
"""
logger.info("Getting all napari plugins from PYPI")
packages = {}
page = 1
params = {"o": "-created", "c": "Framework :: napari"}
while True:
try:
params["page"] = page
response = _get_pypi_response(_SEARCH_URL, params=params)
html = response.text
names = _NAME_PATTERN.findall(html)
versions = _VERSION_PATTERN.findall(html)
logger.info(f"Count of plugins fetched for page={page} {len(packages)}")
if len(names) != len(versions):
raise ValueError("Count of plugin and version don't match")
for name, version in zip(names, versions):
packages[name] = version
page += 1
except HTTPError:
break
logger.info("Getting all napari plugins from npe2api")
packages = get_request(_NPE2API_URL + "/plugins").json()
logger.info(f"Total number of napari plugins fetched={len(packages)}")
return packages

Expand Down