Skip to content

Commit

Permalink
Use fsspec for all types of file access using a monkey patch
Browse files Browse the repository at this point in the history
  • Loading branch information
gutzbenj committed Jan 28, 2022
1 parent 7bbe744 commit 3573ead
Show file tree
Hide file tree
Showing 21 changed files with 330 additions and 217 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
uses: actions/cache@v2
env:
# Increase this value to reset cache if `poetry.lock` has not changed.
CACHE_NUMBER: 1
CACHE_NUMBER: 2
with:
path: ${{ steps.poetry-cache-dir.outputs.dir }}
key: poetry-${{ runner.os }}-py${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}-${{ env.CACHE_NUMBER }}
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Development
***********

- Fix access to ECCC stations listing using Google Drive storage
- Remove/replace caching entirely by fsspec (+monkeypatch)

0.24.0 (24.01.2022)
*******************
Expand Down
26 changes: 0 additions & 26 deletions THIRD_PARTY_NOTICES
Original file line number Diff line number Diff line change
Expand Up @@ -4315,32 +4315,6 @@ licenses_ directory.
.. _GPL-compatible: http://www.gnu.org/licenses/license-list.html


dogpile.cache
1.1.5
MIT License
Mike Bayer
https://github.com/sqlalchemy/dogpile.cache
Copyright 2005-2022 Michael Bayer.

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


duckdb
0.2.8
MIT License
Expand Down
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
# for how to get the version from the pyproject.toml
import os
import sys
import tomlkit

import sphinx_material
import tomlkit

sys.path.insert(0, os.path.abspath(".."))

Expand Down
9 changes: 8 additions & 1 deletion docs/img/readme_img.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@
from matplotlib.colors import ListedColormap
from matplotlib.patches import Rectangle

from wetterdienst.provider.dwd.observation import DwdObservationRequest, DwdObservationValues, DwdObservationParameter, DwdObservationDataset, DwdObservationResolution, DwdObservationPeriod
from wetterdienst.provider.dwd.observation import (
DwdObservationDataset,
DwdObservationParameter,
DwdObservationPeriod,
DwdObservationRequest,
DwdObservationResolution,
DwdObservationValues,
)

plt.style.use('ggplot')

Expand Down
34 changes: 15 additions & 19 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ beautifulsoup4 = "^4.9"
requests = "^2.20"
requests-ftp = "^0.3"
python-dateutil = "^2.8"
"dogpile.cache" = "^1.0"
appdirs = "^1.4"
lxml = "^4.5"
tqdm = "^4.47"
Expand Down Expand Up @@ -145,6 +144,7 @@ ipython-genutils = { version = "^0.2", optional = true }
zarr = { version = "^2.7", optional = true, markers = "sys_platform != 'darwin' or (sys_platform == 'darwin' and platform_machine != 'arm64')" } # not supported through numcodecs
xarray = { version = "^0.17", optional = true }
timezonefinder = "^5.2"
diskcache = "^5.4.0"


[tool.poetry.dev-dependencies]
Expand Down Expand Up @@ -258,8 +258,12 @@ flake8-print = ["+*"]
flake8-return = ["+*"]
flake8-2020 = ["+*"]

[tool.flakeheaven.exceptions."wetterdienst/__init__.py"]
pycodestyle = ["-E402"]
[tool.flakeheaven.exceptions."**/__init__.py"]
pyflakes = ["-F401"]
[tool.flakeheaven.exceptions."wetterdienst/util/fsspec_monkeypatch.py"]
flake8-bugbear = ["-B301"]
[tool.flakeheaven.exceptions."example/"]
flake8-print = ["-*"]
[tool.flakeheaven.exceptions."tests/"]
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ decorator==5.1.1; python_version >= "3.6" and python_version < "4.0"
defusedxml==0.7.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
deprecation==2.1.0
dictdiffer==0.9.0
diskcache==5.4.0; python_version >= "3"
docformatter==1.4
docutils==0.16; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
dogpile.cache==1.1.5; python_version >= "3.6"
entrypoints==0.3; python_full_version >= "3.6.2" and python_full_version < "4.0.0" and python_version >= "3.6" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6")
eradicate==2.0.0; python_version >= "3.6" and python_version < "4.0"
execnet==1.9.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
Expand Down Expand Up @@ -94,7 +94,7 @@ pandas==1.3.5; python_full_version >= "3.7.1"
pandocfilters==1.5.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
pastel==0.2.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.4.0"
pathspec==0.9.0; python_full_version >= "3.6.2"
pbr==5.8.0; python_version >= "3.6"
pbr==5.8.0; python_version >= "3.7"
percy==2.0.2
pint==0.17; python_version >= "3.6"
pip-licenses==3.5.3; python_version >= "3.6" and python_version < "4.0"
Expand Down
2 changes: 2 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
windows_unsupported = pytest.mark.skipif(
windows, reason="can't be tested under windows due to unsupported wradlib library"
)

mac_py39 = sys.platform == "darwin" and sys.version_info.major == 3 and sys.version_info.minor == 9
5 changes: 3 additions & 2 deletions tests/provider/dwd/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
DwdObservationPeriod,
DwdObservationResolution,
)
from wetterdienst.util.cache import CacheExpiry
from wetterdienst.util.network import list_remote_files_fsspec


Expand All @@ -29,8 +30,8 @@ def test_build_index_path():
@pytest.mark.remote
def test_list_files_of_climate_observations():
files_server = list_remote_files_fsspec(
"https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/" "annual/kl/recent",
recursive=False,
"https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/annual/kl/recent",
CacheExpiry.NO_CACHE,
)

assert (
Expand Down
3 changes: 3 additions & 0 deletions tests/ui/explorer/test_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import pytest
from bs4 import BeautifulSoup

from tests import mac_py39


@pytest.mark.slow
@pytest.mark.cflake
Expand All @@ -37,6 +39,7 @@ def test_app_layout(wetterdienst_ui, dash_tre):
assert dash_tre.find_element("#graph")


@pytest.mark.skipif(mac_py39, reason="problem with selenium and mac on py39")
@pytest.mark.slow
@pytest.mark.cflake
@pytest.mark.explorer
Expand Down
9 changes: 9 additions & 0 deletions wetterdienst/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
# Distributed under the MIT License. See LICENSE for more info.
__appname__ = "wetterdienst"


# TODO: MONKEY PATCH FSSPEC
def monkey_patch():
import wetterdienst.util.fsspec_monkeypatch


monkey_patch()


from wetterdienst.api import Wetterdienst
from wetterdienst.metadata.kind import Kind
from wetterdienst.metadata.parameter import Parameter
Expand Down
22 changes: 14 additions & 8 deletions wetterdienst/provider/dwd/forecast/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from urllib.parse import urljoin

import pandas as pd
import requests
from fsspec.implementations.cached import WholeFileCacheFileSystem
from fsspec.implementations.http import HTTPFileSystem
from requests import HTTPError

from wetterdienst.core.scalar.request import ScalarRequestCore
Expand Down Expand Up @@ -38,7 +39,7 @@
DWD_SERVER,
)
from wetterdienst.provider.dwd.metadata.datetime import DatetimeFormat
from wetterdienst.util.cache import metaindex_cache
from wetterdienst.util.cache import CacheExpiry, cache_dir
from wetterdienst.util.enumeration import parse_enumeration_from_template
from wetterdienst.util.geo import convert_dm_to_dd
from wetterdienst.util.network import list_remote_files_fsspec
Expand Down Expand Up @@ -273,7 +274,7 @@ def get_url_for_date(url: str, date: Union[datetime, DwdForecastDate]) -> str:
:param date: date used for filtering of the available files
:return: file url based on the filtering
"""
urls = list_remote_files_fsspec(url, recursive=False)
urls = list_remote_files_fsspec(url, CacheExpiry.NO_CACHE)

if date == DwdForecastDate.LATEST:
try:
Expand Down Expand Up @@ -325,7 +326,7 @@ class DwdMosmixRequest(ScalarRequestCore):
_dataset_base = DwdMosmixDataset
_unit_tree = DwdMosmixUnit

_url = "https://www.dwd.de/DE/leistungen/met_verfahren_mosmix/" "mosmix_stationskatalog.cfg?view=nasPublication"
_url = "https://www.dwd.de/DE/leistungen/met_verfahren_mosmix/mosmix_stationskatalog.cfg?view=nasPublication"

_colspecs = [
(0, 5),
Expand Down Expand Up @@ -477,18 +478,23 @@ def issue_end(self):
"""Required for typing"""
return self.issue_end

@metaindex_cache.cache_on_arguments()
def _all(self) -> pd.DataFrame:
"""
Create meta data DataFrame from available station list
:return:
"""
# TODO: Cache payload with FSSPEC
payload = requests.get(self._url, headers={"User-Agent": ""})

fs = WholeFileCacheFileSystem(
fs=HTTPFileSystem(client_kwargs={"headers": {"User-Agent": ""}}),
cache_storage=cache_dir,
expiry_time=CacheExpiry.METAINDEX.value,
)

payload = fs.cat(self._url)

df = pd.read_fwf(
StringIO(payload.text),
StringIO(payload.decode(encoding="latin-1")),
skiprows=4,
skip_blank_lines=True,
colspecs=self._colspecs,
Expand Down
16 changes: 5 additions & 11 deletions wetterdienst/provider/dwd/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
DWDCDCBase,
)
from wetterdienst.provider.dwd.observation.metadata.dataset import DwdObservationDataset
from wetterdienst.util.cache import fileindex_cache_five_minutes
from wetterdienst.util.cache import CacheExpiry
from wetterdienst.util.network import list_remote_files_fsspec


Expand All @@ -40,18 +40,12 @@ def _create_file_index_for_dwd_server(

url = reduce(urljoin, [DWD_SERVER, DWD_CDC_PATH, cdc_base.value, parameter_path])

if resolution in [Resolution.MINUTE_1] and period in [Period.HISTORICAL]:
recursive = True
else:
recursive = False
files_server = list_remote_files_fsspec(url, recursive=recursive)

return pd.DataFrame(files_server, columns=[DwdColumns.FILENAME.value], dtype=str)
files_server = list_remote_files_fsspec(url, ttl=CacheExpiry.TWELVE_HOURS)

if not files_server:
raise FileNotFoundError(f"url {url} does not have a list of files")

def reset_file_index_cache() -> None:
"""Function to reset the cached file index for all kinds of parameters"""
fileindex_cache_five_minutes.invalidate()
return pd.DataFrame(files_server, columns=[DwdColumns.FILENAME.value], dtype=str)


def build_path_to_parameter(
Expand Down
2 changes: 0 additions & 2 deletions wetterdienst/provider/dwd/observation/fileindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from wetterdienst.provider.dwd.metadata.datetime import DatetimeFormat
from wetterdienst.provider.dwd.observation.metadata.dataset import DwdObservationDataset
from wetterdienst.provider.dwd.observation.metadata.resolution import HIGH_RESOLUTIONS
from wetterdienst.util.cache import fileindex_cache_twelve_hours


def create_file_list_for_climate_observations(
Expand Down Expand Up @@ -52,7 +51,6 @@ def create_file_list_for_climate_observations(
return file_index[DwdColumns.FILENAME.value].values.tolist()


@fileindex_cache_twelve_hours.cache_on_arguments()
def create_file_index_for_climate_observations(
parameter_set: DwdObservationDataset,
resolution: Resolution,
Expand Down
Loading

0 comments on commit 3573ead

Please sign in to comment.