Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logic to verify URLs using HTML meta tag #16597

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements/lint.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ types-certifi
types-first
types-html5lib
types-itsdangerous
types-lxml
types-passlib
types-python-slugify
types-pytz
Expand Down
17 changes: 16 additions & 1 deletion requirements/lint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ cryptography==42.0.8 \
# via
# types-pyopenssl
# types-redis
cssselect==1.2.0 \
--hash=sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc \
--hash=sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e
# via types-lxml
curlylint==0.13.1 \
--hash=sha256:008b9d160f3920404ac12efb05c0a39e209cb972f9aafd956b79c5f4e2162752 \
--hash=sha256:9546ea82cdfc9292fd6fe49dca28587164bd315782a209c0a46e013d7f38d2fa
Expand Down Expand Up @@ -430,6 +434,10 @@ types-babel==2.11.0.15 \
--hash=sha256:282c184c8c9d81e8269212c1b8fa0d39ee88fb8bc43be47980412781c9c85f7e \
--hash=sha256:d0579f2e8adeaef3fbe2eb63e5a2ecf01767fc018e5f3f36a3c9d8b723bd62c7
# via -r requirements/lint.in
types-beautifulsoup4==4.12.0.20240511 \
--hash=sha256:004f6096fdd83b19cdbf6cb10e4eae57b10205eccc365d0a69d77da836012e28 \
--hash=sha256:7ceda66a93ba28d759d5046d7fec9f4cad2f563a77b3a789efc90bcadafeefd1
# via types-lxml
types-boto3==1.0.2 \
--hash=sha256:15f3ffad0314e40a0708fec25f94891414f93260202422bf8b19b6913853c983 \
--hash=sha256:a6a88e94d59d887839863a64095493956efc148e747206880a7eb47d90ae8398
Expand All @@ -449,11 +457,17 @@ types-first==2.0.5.20240806 \
types-html5lib==1.1.11.20240806 \
--hash=sha256:575c4fd84ba8eeeaa8520c7e4c7042b7791f5ec3e9c0a5d5c418124c42d9e7e4 \
--hash=sha256:8060dc98baf63d6796a765bbbc809fff9f7a383f6e3a9add526f814c086545ef
# via -r requirements/lint.in
# via
# -r requirements/lint.in
# types-beautifulsoup4
types-itsdangerous==1.1.6 \
--hash=sha256:21c6966c10e353a5d35d36c82aaa2c5598d3bc32ddc8e0591276da5ad2e3c638 \
--hash=sha256:aef2535c2fa0527dcce244ece0792b20ec02ee46533800735275f82a45a0244d
# via -r requirements/lint.in
types-lxml==2024.8.7 \
--hash=sha256:9ee5cdb1efd60f6eeb101b78f92591fd99202e4878b46d621b52f6cd67a9c80f \
--hash=sha256:a0b8669b2dc57d47dcf31fbbee5007f8ed71b37406f4c7e5fa650e2480568eb9
# via -r requirements/lint.in
types-passlib==1.7.7.20240819 \
--hash=sha256:8fc8df71623845032293d5cf7f8091f0adfeba02d387a2888684b8413f14b3d0 \
--hash=sha256:c4d299083497b66e12258c7b77c08952574213fdf7009da3135d8181a6a25f23
Expand Down Expand Up @@ -513,6 +527,7 @@ typing-extensions==4.12.2 \
# via
# celery-types
# mypy
# types-lxml
urllib3==2.2.2 \
--hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
--hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
Expand Down
278 changes: 273 additions & 5 deletions tests/unit/packaging/test_metadata_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import socket

import pretend
import pytest
import rfc3986
import urllib3

from dns.inet import AF_INET

from warehouse.packaging import metadata_verification as mv

from warehouse.packaging.metadata_verification import _verify_url_pypi, verify_url
HTML_CONTENT = """
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta name="viewport" content="width=device-width" />
{tag_in_head}
<title>Welcome to the example documentation! &mdash; Example documentation</title>
</head>
<body>
{tag_in_body}
"""


@pytest.mark.parametrize(
Expand Down Expand Up @@ -118,7 +136,257 @@
],
)
def test_verify_url_pypi(url, project_name, project_normalized_name, expected):
assert _verify_url_pypi(url, project_name, project_normalized_name) == expected
assert mv._verify_url_pypi(url, project_name, project_normalized_name) == expected


def test_get_url_content(monkeypatch):
url = rfc3986.api.uri_reference("https://example.com")

response = pretend.stub(
close=pretend.call_recorder(lambda: None),
read=lambda amt: "content",
)
pool = pretend.stub(request=lambda *args, **kwargs: response)
monkeypatch.setattr(
urllib3,
"HTTPSConnectionPool",
lambda *args, **kwargs: pool,
)

assert (
mv._get_url_content(
resolved_ip="100.100.100.100", url=url, max_length_bytes=1024
)
== "content"
)
assert response.close.calls == [pretend.call()]


def test_verify_url_meta_tag_urllib_raises(monkeypatch):
monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [(AF_INET, None, None, None, ("1.1.1.1",))],
)

def pool_raises(*args, **kwargs):
raise urllib3.exceptions.ProtocolError()

monkeypatch.setattr(
urllib3.HTTPSConnectionPool,
"__init__",
pool_raises,
)

assert not mv._verify_url_meta_tag("https://example.com", "package1", "package1")


def test_verify_url_meta_tag_getaddrinfo_empty(monkeypatch):
monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [],
)

assert not mv._verify_url_meta_tag("https://example.com", "package1", "package1")


def test_verify_url_meta_tag_url_validation(monkeypatch):
valid_content = HTML_CONTENT.format(
tag_in_head='<meta content="package1" namespace="pypi.org" rel="me" />',
tag_in_body="",
)

monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [(socket.AF_INET, None, None, None, ("1.1.1.1",))],
)
monkeypatch.setattr(
mv,
"_get_url_content",
lambda resolved_ip, url, max_length_bytes: valid_content,
)

# Valid URLs
assert mv._verify_url_meta_tag("https://example.com", "package1", "package1")

# Invalid URLs
assert not mv._verify_url_meta_tag("invalid url", "package1", "package1")
assert not mv._verify_url_meta_tag("http://nothttps.com", "package1", "package1")
assert not mv._verify_url_meta_tag(
"https://portincluded.com:80", "package1", "package1"
)
assert not mv._verify_url_meta_tag(
"https://portincluded.com:443", "package1", "package1"
)
assert not mv._verify_url_meta_tag("missinghttps.com", "package1", "package1")
# IPs are not allowed
assert not mv._verify_url_meta_tag("https://1.1.1.1", "package1", "package1")
assert not mv._verify_url_meta_tag(
"https://2001:0db8:85a3:0000:0000:8a2e:0370:7334", "package1", "package1"
)


@pytest.mark.parametrize(
("ip_address", "family", "expected"),
[
# Domains resolving to global IPs are allowed
("1.1.1.1", socket.AF_INET, True),
("2607:f8b0:4004:c08::8b", socket.AF_INET6, True),
# Domains resolving to private and shared IPs should fail
("127.0.0.1", socket.AF_INET, False),
("0.0.0.0", socket.AF_INET, False),
("192.168.2.1", socket.AF_INET, False),
("10.0.0.2", socket.AF_INET, False),
("172.16.2.3", socket.AF_INET, False),
("100.64.100.3", socket.AF_INET, False),
("169.254.0.2", socket.AF_INET, False),
("::1", socket.AF_INET6, False),
("fd12:3456:789a:1::1", socket.AF_INET6, False),
("fe80::ab8", socket.AF_INET6, False),
# Not IPv4 or IPv6
("2.0000-0c91-f61f", socket.AF_IPX, False),
# Invalid IP
("100.100.100.100.100", socket.AF_INET, False),
],
)
def test_verify_url_meta_tag_ip_validation(monkeypatch, ip_address, family, expected):
valid_content = HTML_CONTENT.format(
tag_in_head='<meta content="package1" namespace="pypi.org" rel="me" />',
tag_in_body="",
)

monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [(family, None, None, None, (ip_address,))],
)
monkeypatch.setattr(
mv,
"_get_url_content",
lambda resolved_ip, url, max_length_bytes: valid_content,
)

assert (
mv._verify_url_meta_tag("https://example.com", "package1", "package1")
== expected
)


@pytest.mark.parametrize(
("project_name", "tag_in_head", "tag_in_body", "expected"),
[
# Correct HTML, expected package inside content attribute
(
"package1",
'<meta content="package1" namespace="pypi.org" rel="me" />',
"",
True,
),
# Correct HTML, expected package inside content attribute with multiple pkgs
(
"package1",
'<meta content="package1 package2 other" namespace="pypi.org" rel="me" />',
"",
True,
),
# Correct HTML, meta tag missing
(
"package1",
"",
"",
False,
),
# Correct HTML, wrong package inside content attribute
(
"package1",
'<meta content="package2" namespace="pypi.org" rel="me" />',
"",
False,
),
# Correct HTML, missing content attribute
(
"package1",
'<meta namespace="pypi.org" rel="me" />',
"",
False,
),
# Correct HTML, incorrect namespace attribute
(
"package1",
'<meta content="package1" namespace="notpypi.org" rel="me" />',
"",
False,
),
# Correct HTML, missing namespace attribute
(
"package1",
'<meta content="package1" rel="me" />',
"",
False,
),
# Correct HTML, incorrect rel attribute
(
"package1",
'<meta content="package1" namespace="pypi.org" rel="notme" />',
"",
False,
),
# Correct HTML, missing rel attribute
(
"package1",
'<meta content="package1" namespace="pypi.org" />',
"",
False,
),
# Correct HTML, tag inside body instead of head
(
"package1",
"",
'<meta content="package1" namespace="pypi.org" rel="me" />',
False,
),
],
)
def test_verify_url_meta_tag_content_parsing(
monkeypatch, project_name, tag_in_head, tag_in_body, expected
):
monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [(socket.AF_INET, None, None, None, ("1.1.1.1",))],
)
monkeypatch.setattr(
mv,
"_get_url_content",
lambda resolved_ip, url, max_length_bytes: HTML_CONTENT.format(
tag_in_head=tag_in_head, tag_in_body=tag_in_body
),
)

assert (
mv._verify_url_meta_tag(
url="https://example.com",
project_name=project_name,
project_normalized_name=project_name,
)
== expected
)


def test_verify_url_meta_tag_content_parsing_invalid_html(monkeypatch):
monkeypatch.setattr(
socket,
"getaddrinfo",
lambda *args: [(socket.AF_INET, None, None, None, ("1.1.1.1",))],
)
monkeypatch.setattr(
mv,
"_get_url_content",
lambda resolved_ip, url, max_length_bytes: "<<<<<",
)
assert not mv._verify_url_meta_tag("https://example.com", "package1", "package1")


def test_verify_url():
Expand All @@ -127,21 +395,21 @@ def test_verify_url():
publisher_verifies = pretend.stub(verify_url=lambda url: True)
publisher_fails = pretend.stub(verify_url=lambda url: False)

assert verify_url(
assert mv.verify_url(
url="https://pypi.org/project/myproject/",
publisher=None,
project_name="myproject",
project_normalized_name="myproject",
)

assert verify_url(
assert mv.verify_url(
url="https://github.com/org/myproject/issues",
publisher=publisher_verifies,
project_name="myproject",
project_normalized_name="myproject",
)

assert not verify_url(
assert not mv.verify_url(
url="example.com",
publisher=publisher_fails,
project_name="myproject",
Expand Down
Loading