From 78249af38a6227f373469880749251bc705c4daa Mon Sep 17 00:00:00 2001 From: Bo Wang Date: Mon, 11 Nov 2024 19:22:34 +0100 Subject: [PATCH] Parse the query parameter of url without using infer_storage_options (#912) --- s3fs/core.py | 7 ++++--- s3fs/tests/test_s3fs.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/s3fs/core.py b/s3fs/core.py index 9b9a3f57..a4a3691f 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -14,7 +14,7 @@ import fsspec # noqa: F401 from fsspec.spec import AbstractBufferedFile -from fsspec.utils import infer_storage_options, tokenize, setup_logging as setup_logger +from fsspec.utils import tokenize, setup_logging as setup_logger from fsspec.asyn import ( AsyncFileSystem, AbstractAsyncStreamedFile, @@ -391,8 +391,9 @@ def _get_kwargs_from_urls(urlpath): Assume that we want to use version_aware mode for the filesystem. """ - url_storage_opts = infer_storage_options(urlpath) - url_query = url_storage_opts.get("url_query") + from urllib.parse import urlsplit + + url_query = urlsplit(urlpath).query out = {} if url_query is not None: from urllib.parse import parse_qs diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index b64907d1..612db73f 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -79,6 +79,7 @@ def s3_base(): os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" if "AWS_ACCESS_KEY_ID" not in os.environ: os.environ["AWS_ACCESS_KEY_ID"] = "foo" + os.environ.pop("AWS_PROFILE", None) print("server up") yield @@ -2162,6 +2163,42 @@ def test_via_fsspec(s3): assert f.read() == b"hello" +@pytest.mark.parametrize( + ["raw_url", "expected_url", "expected_version_aware"], + [ + ( + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg", + "arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg", + False, + ), + ( + "s3://arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg?versionId=some_version_id", + "arn:aws:s3:us-west-2:123456789012:accesspoint/abc/123.jpg?versionId=some_version_id", + True, + ), + ( + "s3://xyz/abc/123.jpg", + "xyz/abc/123.jpg", + False, + ), + ( + "s3://xyz/abc/123.jpg?versionId=some_version_id", + "xyz/abc/123.jpg?versionId=some_version_id", + True, + ), + ], +) +def test_fsspec_url_to_fs_compatability( + s3, raw_url, expected_url, expected_version_aware +): + import fsspec + + fs, url = fsspec.url_to_fs(raw_url) + assert isinstance(fs, type(s3)) + assert fs.version_aware is expected_version_aware + assert url == expected_url + + def test_repeat_exists(s3): fn = "s3://" + test_bucket_name + "/file1" s3.touch(fn)