Skip to content

Commit

Permalink
Enable listings cache for HTTP filesystem
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Mar 6, 2021
1 parent 2d4045f commit 61c14eb
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 3 deletions.
29 changes: 26 additions & 3 deletions fsspec/implementations/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import re
import weakref
from copy import copy
from urllib.parse import urlparse

import aiohttp
Expand Down Expand Up @@ -80,7 +81,18 @@ def __init__(
self.cache_type = cache_type
self.cache_options = cache_options
self.client_kwargs = client_kwargs or {}
self.kwargs = storage_options

# Clean caching-related parameters from `storage_options`
# before propagating them as `request_options` through `self.kwargs`.
# TODO: Maybe rename `self.kwargs` to `self.request_options` to make
# it clearer.
request_options = copy(storage_options)
self.use_listings_cache = request_options.pop("use_listings_cache", False)
request_options.pop("listings_expiry_time", None)
request_options.pop("max_paths", None)
request_options.pop("skip_instance_cache", None)
self.kwargs = request_options

if not asynchronous:
self._session = sync(self.loop, get_client, **self.client_kwargs)
weakref.finalize(self, sync, self.loop, self.session.close)
Expand Down Expand Up @@ -109,7 +121,7 @@ def _parent(cls, path):
return par
return ""

async def _ls(self, url, detail=True, **kwargs):
async def _ls_real(self, url, detail=True, **kwargs):
# ignoring URL-encoded arguments
kw = self.kwargs.copy()
kw.update(kwargs)
Expand Down Expand Up @@ -142,7 +154,18 @@ async def _ls(self, url, detail=True, **kwargs):
# Ignore FTP-like "parent"
out.add("/".join([url.rstrip("/"), l.lstrip("/")]))
if not out and url.endswith("/"):
return await self._ls(url.rstrip("/"), detail=True)
return await self._ls_real(url.rstrip("/"), detail=True)

return out

async def _ls(self, url, detail=True, **kwargs):

if self.use_listings_cache and url in self.dircache:
out = self.dircache[url]
else:
out = await self._ls_real(url, detail=detail, **kwargs)
self.dircache[url] = out

if detail:
return [
{
Expand Down
30 changes: 30 additions & 0 deletions fsspec/implementations/tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,36 @@ def test_list(server):
assert out == [server + "/index/realfile"]


def test_list_invalid_args(server):
with pytest.raises(TypeError):
h = fsspec.filesystem("http", use_foobar=True)
h.glob(server + "/index/*")


def test_list_cache(server):
h = fsspec.filesystem("http", use_listings_cache=True)
out = h.glob(server + "/index/*")
assert out == [server + "/index/realfile"]


def test_list_cache_with_expiry_time(server):
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=30)
out = h.glob(server + "/index/*")
assert out == [server + "/index/realfile"]


def test_list_cache_with_max_paths(server):
h = fsspec.filesystem("http", use_listings_cache=True, max_paths=5)
out = h.glob(server + "/index/*")
assert out == [server + "/index/realfile"]


def test_list_cache_with_skip_instance_cache(server):
h = fsspec.filesystem("http", use_listings_cache=True, skip_instance_cache=True)
out = h.glob(server + "/index/*")
assert out == [server + "/index/realfile"]


def test_isdir(server):
h = fsspec.filesystem("http")
assert h.isdir(server + "/index/")
Expand Down

0 comments on commit 61c14eb

Please sign in to comment.