Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse the query parameter of url without using infer_storage_options #912

Merged
merged 2 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import fsspec # noqa: F401
from fsspec.spec import AbstractBufferedFile
from fsspec.utils import infer_storage_options, tokenize, setup_logging as setup_logger
from fsspec.utils import tokenize, setup_logging as setup_logger
from fsspec.asyn import (
AsyncFileSystem,
AbstractAsyncStreamedFile,
Expand Down Expand Up @@ -391,8 +391,9 @@ def _get_kwargs_from_urls(urlpath):
Assume that we want to use version_aware mode for
the filesystem.
"""
url_storage_opts = infer_storage_options(urlpath)
url_query = url_storage_opts.get("url_query")
from urllib.parse import urlsplit

url_query = urlsplit(urlpath).query
out = {}
if url_query is not None:
from urllib.parse import parse_qs
Expand Down
37 changes: 37 additions & 0 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def s3_base():
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
if "AWS_ACCESS_KEY_ID" not in os.environ:
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
os.environ.pop("AWS_PROFILE", None)

print("server up")
yield
Expand Down Expand Up @@ -2144,6 +2145,42 @@ def test_via_fsspec(s3):
assert f.read() == b"hello"


@pytest.mark.parametrize(
["raw_url", "expected_url", "expected_version_aware"],
[
(
"s3://arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg",
"arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg",
False,
),
(
"s3://arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg?versionId=some_version_id",
"arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg?versionId=some_version_id",
True,
),
(
"s3://xyz/abc/123.jpg",
"xyz/abc/123.jpg",
False,
),
(
"s3://xyz/abc/123.jpg?versionId=some_version_id",
"xyz/abc/123.jpg?versionId=some_version_id",
True,
),
],
)
def test_fsspec_url_to_fs_compatability(
s3, raw_url, expected_url, expected_version_aware
):
import fsspec

fs, url = fsspec.url_to_fs(raw_url)
assert isinstance(fs, type(s3))
assert fs.version_aware is expected_version_aware
assert url == expected_url


def test_repeat_exists(s3):
fn = "s3://" + test_bucket_name + "/file1"
s3.touch(fn)
Expand Down
Loading