diff --git a/CHANGELOG.md b/CHANGELOG.md index 1317b2e2d..8f5db236c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- `CsvJobDatabase`: workaround GeoPandas issue (on Python>3.9) when there is a column named "crs" ([#714](https://github.com/Open-EO/openeo-python-client/issues/714)) + ## [0.37.0] - 2025-01-21 - "SAP10" release diff --git a/openeo/extra/job_management/__init__.py b/openeo/extra/job_management/__init__.py index 41b8fdfd3..9e40b09fb 100644 --- a/openeo/extra/job_management/__init__.py +++ b/openeo/extra/job_management/__init__.py @@ -904,7 +904,8 @@ def read(self) -> pd.DataFrame: import geopandas # `df.to_csv()` in `persist()` has encoded geometries as WKT, so we decode that here. - df = geopandas.GeoDataFrame(df, geometry=geopandas.GeoSeries.from_wkt(df["geometry"])) + df.geometry = geopandas.GeoSeries.from_wkt(df["geometry"]) + df = geopandas.GeoDataFrame(df) return df def persist(self, df: pd.DataFrame): diff --git a/setup.py b/setup.py index 68906eccc..3d044bfb5 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,9 @@ "urllib3<2.3.0", # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484 "netCDF4>=1.7.0", "matplotlib", # TODO: eliminate matplotlib as test dependency - "geopandas", + # TODO #717 Simplify geopandas constraints when Python 3.8 support is dropped + "geopandas>=0.14; python_version>='3.9'", + "geopandas", # Best-effort geopandas dependency for Python 3.8 "flake8>=5.0.0", "time_machine", "pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency diff --git a/tests/extra/job_management/test_job_management.py b/tests/extra/job_management/test_job_management.py index d88a589a3..46108f0fa 100644 --- a/tests/extra/job_management/test_job_management.py +++ b/tests/extra/job_management/test_job_management.py @@ -1,4 +1,5 @@ import copy +import datetime import json import logging import re @@ -7,7 +8,6 @@ from time import sleep from typing import Callable, Union from unittest import mock -import datetime import dirty_equals import geopandas @@ -40,6 +40,7 @@ ) from openeo.rest._testing import OPENEO_BACKEND, DummyBackend, build_capabilities from openeo.util import rfc3339 +from openeo.utils.version import ComparableVersion @pytest.fixture @@ -977,6 +978,30 @@ def test_initialize_from_df_on_exists_skip(self, tmp_path): ) assert set(db.read()["some_number"]) == {1, 2, 3} + @pytest.mark.skipif( + ComparableVersion(geopandas.__version__) < "0.14", + reason="This issue has no workaround with geopandas < 0.14 (highest available version on Python 3.8 is 0.13.2)", + ) + def test_read_with_crs_column(self, tmp_path): + """ + Having a column named "crs" can cause obscure error messages when creating a GeoPandas dataframe + https://github.com/Open-EO/openeo-python-client/issues/714 + """ + source_df = pd.DataFrame( + { + "crs": [1234], + "geometry": ["Point(2 3)"], + } + ) + path = tmp_path / "jobs.csv" + source_df.to_csv(path, index=False) + result_df = CsvJobDatabase(path).read() + assert isinstance(result_df, geopandas.GeoDataFrame) + assert result_df.to_dict(orient="list") == { + "crs": [1234], + "geometry": [shapely.geometry.Point(2, 3)], + } + class TestParquetJobDatabase: